def _eval_block(iblock, start, stop, nstates, total_fluxes, cond_fluxes, rates, mcbs_alpha, mcbs_nsets, mcbs_acalpha): results = [[], [], []] # results are target fluxes, conditional fluxes, rates for istate in xrange(nstates): ci_res = mcbs_ci_correl(total_fluxes[:, istate], estimator=numpy.mean, alpha=mcbs_alpha, n_sets=mcbs_nsets, autocorrel_alpha=mcbs_acalpha, subsample=numpy.mean) results[0].append((iblock, istate, (start, stop) + ci_res)) for jstate in xrange(nstates): if istate == jstate: continue ci_res = mcbs_ci_correl(cond_fluxes[:, istate, jstate], estimator=numpy.mean, alpha=mcbs_alpha, n_sets=mcbs_nsets, autocorrel_alpha=mcbs_acalpha, subsample=numpy.mean) results[1].append((iblock, istate, jstate, (start, stop) + ci_res)) ci_res = mcbs_ci_correl(rates[:, istate, jstate], estimator=numpy.mean, alpha=mcbs_alpha, n_sets=mcbs_nsets, autocorrel_alpha=mcbs_acalpha, subsample=numpy.mean) results[2].append((iblock, istate, jstate, (start, stop) + ci_res)) return results
def _2D_eval_block(iblock, start, stop, nstates, data_input, name, mcbs_alpha, mcbs_nsets, mcbs_acalpha, do_correl, mcbs_enable, estimator_kwargs): # As our reweighting estimator is a weird function, we can't use the general mclib block. results = [] for istate in range(nstates): for jstate in range(nstates): if istate == jstate: continue estimator_kwargs.update( dict(istate=istate, jstate=jstate, nstates=nstates)) dataset = { 'indices': np.array(list(range(start - 1, stop - 1)), dtype=np.uint16) } ci_res = mcbs_ci_correl(dataset, estimator=reweight_for_c, alpha=mcbs_alpha, n_sets=mcbs_nsets, autocorrel_alpha=mcbs_acalpha, subsample=(lambda x: x[0]), do_correl=do_correl, mcbs_enable=mcbs_enable, estimator_kwargs=estimator_kwargs) results.append( (name, iblock, istate, jstate, (start, stop) + ci_res)) return results
def _rate_eval_block(iblock, start, stop, nstates, data_input, name, mcbs_alpha, mcbs_nsets, mcbs_acalpha, do_correl, mcbs_enable): # Our rate estimator is a little more complex, so we've defined a custom evaluation block for it, # instead of just using the block evalutors that we've imported. results = [] for istate in range(nstates): for jstate in range(nstates): if istate == jstate: continue kwargs = {'istate': istate, 'jstate': jstate} # Why are we sending in the total population dataset, instead of a sliced one? # It's a requirement of our estimator; we need to pull from any given i to j state in order to properly normalize # and avoid i to j rate constants which are affected by a third state k. # That is, we need the populations for both i and j, and it's easier to just send in the entire dataset. dataset = { 'dataset': data_input['dataset'][:, istate, jstate], 'pops': data_input['pops'] } ci_res = mcbs_ci_correl(dataset, estimator=sequence_macro_flux_to_rate, alpha=mcbs_alpha, n_sets=mcbs_nsets, autocorrel_alpha=mcbs_acalpha, subsample=numpy.mean, do_correl=do_correl, mcbs_enable=mcbs_enable, estimator_kwargs=kwargs) results.append( (name, iblock, istate, jstate, (start, stop) + ci_res)) return results
def _1D_eval_block(iblock, start, stop, nstates, data_input, name, mcbs_alpha, mcbs_nsets, mcbs_acalpha, do_correl, mcbs_enable, estimator_kwargs): # As our reweighting estimator is a weird function, we can't use the general mclib block. results = [] for istate in range(nstates): # A little hack to make our estimator play nice, as jstate must be there. # For 1D datasets (state probabilities, etc), the argument isn't used in our estimator, # and so any variable which has the proper type is fine. estimator_kwargs.update( dict(istate=istate, jstate=istate, nstates=nstates)) dataset = { 'indices': np.array(list(range(start - 1, stop - 1)), dtype=np.uint16) } ci_res = mcbs_ci_correl(dataset, estimator=reweight_for_c, alpha=mcbs_alpha, n_sets=mcbs_nsets, autocorrel_alpha=mcbs_acalpha, subsample=(lambda x: x[0]), do_correl=do_correl, mcbs_enable=mcbs_enable, estimator_kwargs=estimator_kwargs) results.append((name, iblock, istate, (start, stop) + ci_res)) return results
def calc_evol_flux(self): westpa.rc.pstatus('Calculating cumulative evolution of flux confidence intervals every {} iteration(s)' .format(self.evol_step)) for itarget, (target_label, target_fluxdata) in enumerate(self.fluxdata.iteritems()): fluxes = target_fluxdata['flux'] target_group = self.target_groups[target_label] iter_start = target_group['n_iter'][0] iter_stop = target_group['n_iter'][-1] iter_count = iter_stop - iter_start n_blocks = iter_count // self.evol_step if iter_count % self.evol_step > 0: n_blocks += 1 cis = numpy.empty((n_blocks,), dtype=ci_dtype) for iblock in xrange(n_blocks): block_iter_stop = min(iter_start + (iblock+1)*self.evol_step, iter_stop) istop = min((iblock+1)*self.evol_step, len(target_fluxdata['flux'])) fluxes = target_fluxdata['flux'][:istop] avg, ci_lb, ci_ub, correl_len = mclib.mcbs_ci_correl(fluxes, numpy.mean, self.alpha, self.n_sets, autocorrel_alpha = self.autocorrel_alpha, subsample=numpy.mean) cis[iblock]['iter_start'] = iter_start cis[iblock]['iter_stop'] = block_iter_stop cis[iblock]['expected'], cis[iblock]['ci_lbound'], cis[iblock]['ci_ubound'] = avg, ci_lb, ci_ub cis[iblock]['corr_len'] = correl_len del fluxes cis_ds = target_group.create_dataset('flux_evolution', data=cis) cis_ds.attrs['iter_step'] = self.evol_step cis_ds.attrs['mcbs_alpha'] = self.alpha cis_ds.attrs['mcbs_autocorrel_alpha'] = self.autocorrel_alpha cis_ds.attrs['mcbs_n_sets'] = self.n_sets
def _eval_block(iblock, istate, start, stop, state_pops, mcbs_alpha, mcbs_nsets, mcbs_acalpha): ci_res = mcbs_ci_correl(state_pops, estimator=numpy.mean, alpha=mcbs_alpha, n_sets=mcbs_nsets, autocorrel_alpha=mcbs_acalpha, subsample=numpy.mean) return (iblock, istate, (start, stop) + ci_res)
def _eval_block(iblock, start, stop, nstates, total_fluxes, cond_fluxes, rates, mcbs_alpha, mcbs_nsets, mcbs_acalpha): results = [[],[],[]] # results are target fluxes, conditional fluxes, rates for istate in xrange(nstates): ci_res = mcbs_ci_correl(total_fluxes[:,istate],estimator=numpy.mean, alpha=mcbs_alpha,n_sets=mcbs_nsets,autocorrel_alpha=mcbs_acalpha, subsample=numpy.mean) results[0].append((iblock,istate,(start,stop)+ci_res)) for jstate in xrange(nstates): if istate == jstate: continue ci_res = mcbs_ci_correl(cond_fluxes[:,istate,jstate],estimator=numpy.mean, alpha=mcbs_alpha,n_sets=mcbs_nsets,autocorrel_alpha=mcbs_acalpha, subsample=numpy.mean) results[1].append((iblock, istate, jstate, (start,stop) + ci_res)) ci_res = mcbs_ci_correl(rates[:,istate,jstate],estimator=numpy.mean, alpha=mcbs_alpha,n_sets=mcbs_nsets,autocorrel_alpha=mcbs_acalpha, subsample=numpy.mean) results[2].append((iblock, istate, jstate, (start,stop) + ci_res)) return results
def calc_evol_flux(self): westpa.rc.pstatus( 'Calculating cumulative evolution of flux confidence intervals every {} iteration(s)' .format(self.evol_step)) for itarget, (target_label, target_fluxdata) in enumerate(self.fluxdata.items()): fluxes = target_fluxdata['flux'] target_group = self.target_groups[target_label] iter_start = target_group['n_iter'][0] iter_stop = target_group['n_iter'][-1] iter_count = iter_stop - iter_start n_blocks = iter_count // self.evol_step if iter_count % self.evol_step > 0: n_blocks += 1 cis = numpy.empty((n_blocks, ), dtype=ci_dtype) for iblock in range(n_blocks): block_iter_stop = min( iter_start + (iblock + 1) * self.evol_step, iter_stop) istop = min((iblock + 1) * self.evol_step, len(target_fluxdata['flux'])) fluxes = target_fluxdata['flux'][:istop] #avg, ci_lb, ci_ub, correl_len = mclib.mcbs_ci_correl(fluxes, numpy.mean, self.alpha, self.n_sets, # autocorrel_alpha = self.autocorrel_alpha, # subsample=numpy.mean) avg, ci_lb, ci_ub, sterr, correl_len = mclib.mcbs_ci_correl( {'dataset': fluxes}, estimator=(lambda stride, dataset: numpy.mean(dataset)), alpha=self.alpha, n_sets=self.n_sets, autocorrel_alpha=self.autocorrel_alpha, subsample=numpy.mean, do_correl=self.do_correl, mcbs_enable=self.mcbs_enable) cis[iblock]['iter_start'] = iter_start cis[iblock]['iter_stop'] = block_iter_stop cis[iblock]['expected'], cis[iblock]['ci_lbound'], cis[iblock][ 'ci_ubound'] = avg, ci_lb, ci_ub cis[iblock]['corr_len'] = correl_len cis[iblock]['sterr'] = sterr del fluxes cis_ds = target_group.create_dataset('flux_evolution', data=cis) cis_ds.attrs['iter_step'] = self.evol_step cis_ds.attrs['mcbs_alpha'] = self.alpha cis_ds.attrs['mcbs_autocorrel_alpha'] = self.autocorrel_alpha cis_ds.attrs['mcbs_n_sets'] = self.n_sets
def _2D_eval_block(iblock, start, stop, nstates, data_input, name, mcbs_alpha, mcbs_nsets, mcbs_acalpha, do_correl, mcbs_enable, estimator_kwargs): # As our reweighting estimator is a weird function, we can't use the general mclib block. results = [] for istate in xrange(nstates): for jstate in xrange(nstates): if istate == jstate: continue estimator_kwargs.update(dict(istate=istate, jstate=jstate, nstates=nstates)) dataset = { 'indices' : np.array(range(start-1, stop-1), dtype=np.uint16) } ci_res = mcbs_ci_correl(dataset,estimator=reweight_for_c, alpha=mcbs_alpha,n_sets=mcbs_nsets,autocorrel_alpha=mcbs_acalpha, subsample=(lambda x: x[0]), do_correl=do_correl, mcbs_enable=mcbs_enable, estimator_kwargs=estimator_kwargs) results.append((name, iblock, istate, jstate, (start,stop) + ci_res)) return results
def _1D_eval_block(iblock, start, stop, nstates, data_input, name, mcbs_alpha, mcbs_nsets, mcbs_acalpha, do_correl, mcbs_enable, estimator_kwargs): # As our reweighting estimator is a weird function, we can't use the general mclib block. results = [] for istate in xrange(nstates): # A little hack to make our estimator play nice, as jstate must be there. # For 1D datasets (state probabilities, etc), the argument isn't used in our estimator, # and so any variable which has the proper type is fine. estimator_kwargs.update(dict(istate=istate, jstate=istate, nstates=nstates)) dataset = { 'indices' : np.array(range(start-1, stop-1), dtype=np.uint16) } ci_res = mcbs_ci_correl(dataset,estimator=reweight_for_c, alpha=mcbs_alpha,n_sets=mcbs_nsets,autocorrel_alpha=mcbs_acalpha, subsample=(lambda x: x[0]), do_correl=do_correl, mcbs_enable=mcbs_enable, estimator_kwargs=estimator_kwargs) results.append((name, iblock, istate, (start,stop) + ci_res)) return results
def _rate_eval_block(iblock, start, stop, nstates, data_input, name, mcbs_alpha, mcbs_nsets, mcbs_acalpha, do_correl, mcbs_enable): # Our rate estimator is a little more complex, so we've defined a custom evaluation block for it, # instead of just using the block evalutors that we've imported. results = [] for istate in xrange(nstates): for jstate in xrange(nstates): if istate == jstate: continue kwargs = { 'istate' : istate, 'jstate': jstate } # Why are we sending in the total population dataset, instead of a sliced one? # It's a requirement of our estimator; we need to pull from any given i to j state in order to properly normalize # and avoid i to j rate constants which are affected by a third state k. # That is, we need the populations for both i and j, and it's easier to just send in the entire dataset. dataset = {'dataset': data_input['dataset'][:, istate, jstate], 'pops': data_input['pops'] } ci_res = mcbs_ci_correl(dataset,estimator=sequence_macro_flux_to_rate, alpha=mcbs_alpha,n_sets=mcbs_nsets,autocorrel_alpha=mcbs_acalpha, subsample=numpy.mean, do_correl=do_correl, mcbs_enable=mcbs_enable, estimator_kwargs=kwargs) results.append((name, iblock, istate, jstate, (start,stop) + ci_res)) return results
def go(self): pi = self.progress.indicator with pi: pi.new_operation('Initializing') self.open_files() nstates = self.assignments_file.attrs['nstates'] nbins = self.assignments_file.attrs['nbins'] state_labels = self.assignments_file['state_labels'][...] assert nstates == len(state_labels) start_iter, stop_iter, step_iter = self.iter_range.iter_start, self.iter_range.iter_stop, self.iter_range.iter_step pi.new_operation('Reading data') cond_fluxes = h5io.IterBlockedDataset(self.kinetics_file['conditional_fluxes']) cond_fluxes.cache_data() total_fluxes = h5io.IterBlockedDataset(self.kinetics_file['total_fluxes']) pops = h5io.IterBlockedDataset(self.assignments_file['labeled_populations']) pops.cache_data() pops.data = pops.data.sum(axis=2) rates = h5io.IterBlockedDataset.empty_like(cond_fluxes) rates.data = sequence_macro_flux_to_rate(cond_fluxes.data, pops.data[:nstates,:nbins]) avg_total_fluxes = numpy.zeros((nstates,), dtype=ci_dtype) avg_conditional_fluxes = numpy.zeros((nstates,nstates), dtype=ci_dtype) avg_rates = numpy.zeros((nstates,nstates), dtype=ci_dtype) # Calculate overall average rates pi.new_operation('Averaging overall fluxes into states', nstates) for istate in xrange(nstates): ci_res = mcbs_ci_correl(total_fluxes.iter_slice(start_iter,stop_iter)[:,istate],estimator=numpy.mean, alpha=self.mcbs_alpha,n_sets=self.mcbs_nsets,autocorrel_alpha=self.mcbs_acalpha, subsample=numpy.mean) avg_total_fluxes[istate] = (start_iter, stop_iter) + ci_res pi.progress += 1 pi.new_operation('Averaging state-to-state fluxes and rates', nstates*(nstates-1)) for istate in xrange(nstates): for jstate in xrange(nstates): if istate == jstate: continue flux_ci_res = mcbs_ci_correl(cond_fluxes.iter_slice(start_iter,stop_iter)[:,istate,jstate],estimator=numpy.mean, alpha=self.mcbs_alpha,n_sets=self.mcbs_nsets,autocorrel_alpha=self.mcbs_acalpha, subsample=numpy.mean) rate_ci_res = mcbs_ci_correl(rates.iter_slice(start_iter,stop_iter)[:,istate,jstate],estimator=numpy.mean, alpha=self.mcbs_alpha,n_sets=self.mcbs_nsets,autocorrel_alpha=self.mcbs_acalpha, subsample=numpy.mean) avg_conditional_fluxes[istate, jstate] = (start_iter, stop_iter) + flux_ci_res avg_rates[istate, jstate] = (start_iter, stop_iter) + rate_ci_res pi.progress += 1 pi.new_operation('Saving averages') self.output_file['avg_rates'] = avg_rates self.output_file['avg_conditional_fluxes'] = avg_conditional_fluxes self.output_file['avg_total_fluxes'] = avg_total_fluxes for ds in ('avg_rates', 'avg_conditional_fluxes', 'avg_total_fluxes'): self.stamp_mcbs_info(self.output_file[ds]) self.output_file['state_labels'] = state_labels maxlabellen = max(map(len,state_labels)) pi.clear() print('fluxes into macrostates:') for istate in xrange(nstates): print('{:{maxlabellen}s}: mean={:21.15e} CI=({:21.15e}, {:21.15e}) * tau^-1' .format(state_labels[istate], avg_total_fluxes['expected'][istate], avg_total_fluxes['ci_lbound'][istate], avg_total_fluxes['ci_ubound'][istate], maxlabellen=maxlabellen)) print('\nfluxes from state to state:') for istate in xrange(nstates): for jstate in xrange(nstates): if istate == jstate: continue print('{:{maxlabellen}s} -> {:{maxlabellen}s}: mean={:21.15e} CI=({:21.15e}, {:21.15e}) * tau^-1' .format(state_labels[istate], state_labels[jstate], avg_conditional_fluxes['expected'][istate,jstate], avg_conditional_fluxes['ci_lbound'][istate,jstate], avg_conditional_fluxes['ci_ubound'][istate,jstate], maxlabellen=maxlabellen)) print('\nrates from state to state:') for istate in xrange(nstates): for jstate in xrange(nstates): if istate == jstate: continue print('{:{maxlabellen}s} -> {:{maxlabellen}s}: mean={:21.15e} CI=({:21.15e}, {:21.15e}) * tau^-1' .format(state_labels[istate], state_labels[jstate], avg_rates['expected'][istate,jstate], avg_rates['ci_lbound'][istate,jstate], avg_rates['ci_ubound'][istate,jstate], maxlabellen=maxlabellen)) # skip evolution if not requested if self.evolution_mode == 'none' or not step_iter: return start_pts = range(start_iter, stop_iter, step_iter) target_evol = numpy.zeros((len(start_pts), nstates), dtype=ci_dtype) flux_evol = numpy.zeros((len(start_pts), nstates, nstates), dtype=ci_dtype) rate_evol = numpy.zeros((len(start_pts), nstates, nstates), dtype=ci_dtype) pi.new_operation('Calculating flux/rate evolution', len(start_pts)) futures = [] for iblock, start in enumerate(start_pts): stop = min(start+step_iter, stop_iter) if self.evolution_mode == 'cumulative': windowsize = int(self.evol_window_frac * (stop - start_iter)) block_start = max(start_iter, stop - windowsize) else: # self.evolution_mode == 'blocked' block_start = start future = self.work_manager.submit(_eval_block, kwargs=dict(iblock=iblock, start=block_start, stop=stop, nstates=nstates, total_fluxes=total_fluxes.iter_slice(block_start,stop), cond_fluxes = cond_fluxes.iter_slice(block_start,stop), rates=rates.iter_slice(block_start,stop), mcbs_alpha=self.mcbs_alpha, mcbs_nsets=self.mcbs_nsets, mcbs_acalpha=self.mcbs_acalpha)) futures.append(future) for future in self.work_manager.as_completed(futures): pi.progress += 1 target_results, condflux_results, rate_results = future.get_result(discard=True) for result in target_results: iblock,istate,ci_result = result target_evol[iblock,istate] = ci_result for result in condflux_results: iblock,istate,jstate,ci_result = result flux_evol[iblock,istate, jstate] = ci_result for result in rate_results: iblock, istate, jstate, ci_result = result rate_evol[iblock, istate, jstate] = ci_result df_ds = self.output_file.create_dataset('conditional_flux_evolution', data=flux_evol, shuffle=True, compression=9) tf_ds = self.output_file.create_dataset('target_flux_evolution', data=target_evol, shuffle=True, compression=9) rate_ds = self.output_file.create_dataset('rate_evolution', data=rate_evol, shuffle=True, compression=9) for ds in (df_ds, tf_ds, rate_ds): self.stamp_mcbs_info(ds)
def calc_store_flux_data(self): westpa.rc.pstatus( 'Calculating mean flux and confidence intervals for iterations [{},{})' .format(self.iter_range.iter_start, self.iter_range.iter_stop)) fluxdata = extract_fluxes(self.iter_range.iter_start, self.iter_range.iter_stop, self.data_reader) # Create a group to store data in output_group = h5io.create_hdf5_group(self.output_h5file, 'target_flux', replace=False, creating_program=self.prog) self.output_group = output_group output_group.attrs['version_code'] = self.output_format_version self.iter_range.record_data_iter_range(output_group) n_targets = len(fluxdata) index = numpy.empty((len(fluxdata), ), dtype=target_index_dtype) avg_fluxdata = numpy.empty((n_targets, ), dtype=ci_dtype) for itarget, (target_label, target_fluxdata) in enumerate(fluxdata.items()): # Create group and index entry index[itarget]['target_label'] = str(target_label) target_group = output_group.create_group( 'target_{}'.format(itarget)) self.target_groups[target_label] = target_group # Store per-iteration values target_group['n_iter'] = target_fluxdata['n_iter'] target_group['count'] = target_fluxdata['count'] target_group['flux'] = target_fluxdata['flux'] h5io.label_axes(target_group['flux'], ['n_iter'], units=['tau^-1']) # Calculate flux autocorrelation fluxes = target_fluxdata['flux'] mean_flux = fluxes.mean() fmm = fluxes - mean_flux acorr = fftconvolve(fmm, fmm[::-1]) acorr = acorr[len(acorr) // 2:] acorr /= acorr[0] acorr_ds = target_group.create_dataset('flux_autocorrel', data=acorr) h5io.label_axes(acorr_ds, ['lag'], ['tau']) # Calculate overall averages and CIs #avg, lb_ci, ub_ci, correl_len = mclib.mcbs_ci_correl(fluxes, numpy.mean, self.alpha, self.n_sets, # autocorrel_alpha=self.autocorrel_alpha, subsample=numpy.mean) avg, lb_ci, ub_ci, sterr, correl_len = mclib.mcbs_ci_correl( {'dataset': fluxes}, estimator=(lambda stride, dataset: numpy.mean(dataset)), alpha=self.alpha, n_sets=self.n_sets, autocorrel_alpha=self.autocorrel_alpha, subsample=numpy.mean, do_correl=self.do_correl, mcbs_enable=self.mcbs_enable) avg_fluxdata[itarget] = (self.iter_range.iter_start, self.iter_range.iter_stop, avg, lb_ci, ub_ci, sterr, correl_len) westpa.rc.pstatus('target {!r}:'.format(target_label)) westpa.rc.pstatus( ' correlation length = {} tau'.format(correl_len)) westpa.rc.pstatus( ' mean flux and CI = {:e} ({:e},{:e}) tau^(-1)'.format( avg, lb_ci, ub_ci)) index[itarget]['mean_flux'] = avg index[itarget]['mean_flux_ci_lb'] = lb_ci index[itarget]['mean_flux_ci_ub'] = ub_ci index[itarget]['mean_flux_correl_len'] = correl_len # Write index and summary index_ds = output_group.create_dataset('index', data=index) index_ds.attrs['mcbs_alpha'] = self.alpha index_ds.attrs['mcbs_autocorrel_alpha'] = self.autocorrel_alpha index_ds.attrs['mcbs_n_sets'] = self.n_sets self.fluxdata = fluxdata self.output_h5file['avg_flux'] = avg_fluxdata
def _eval_block(iblock, istate, start, stop, state_pops, mcbs_alpha, mcbs_nsets, mcbs_acalpha): ci_res = mcbs_ci_correl(state_pops,estimator=numpy.mean,alpha=mcbs_alpha,n_sets=mcbs_nsets, autocorrel_alpha=mcbs_acalpha,subsample=numpy.mean) return (iblock,istate,(start,stop)+ci_res)
def go(self): pi = self.progress.indicator with pi: pi.new_operation('Initializing') self.open_files() nstates = self.assignments_file.attrs['nstates'] nbins = self.assignments_file.attrs['nbins'] state_labels = self.assignments_file['state_labels'][...] assert nstates == len(state_labels) start_iter, stop_iter, step_iter = self.iter_range.iter_start, self.iter_range.iter_stop, self.iter_range.iter_step pi.new_operation('Reading data') cond_fluxes = h5io.IterBlockedDataset( self.kinetics_file['conditional_fluxes']) cond_fluxes.cache_data() total_fluxes = h5io.IterBlockedDataset( self.kinetics_file['total_fluxes']) pops = h5io.IterBlockedDataset( self.assignments_file['labeled_populations']) pops.cache_data() pops.data = pops.data.sum(axis=2) rates = h5io.IterBlockedDataset.empty_like(cond_fluxes) rates.data = sequence_macro_flux_to_rate( cond_fluxes.data, pops.data[:nstates, :nbins]) avg_total_fluxes = numpy.zeros((nstates, ), dtype=ci_dtype) avg_conditional_fluxes = numpy.zeros((nstates, nstates), dtype=ci_dtype) avg_rates = numpy.zeros((nstates, nstates), dtype=ci_dtype) # Calculate overall average rates pi.new_operation('Averaging overall fluxes into states', nstates) for istate in xrange(nstates): ci_res = mcbs_ci_correl(total_fluxes.iter_slice( start_iter, stop_iter)[:, istate], estimator=numpy.mean, alpha=self.mcbs_alpha, n_sets=self.mcbs_nsets, autocorrel_alpha=self.mcbs_acalpha, subsample=numpy.mean) avg_total_fluxes[istate] = (start_iter, stop_iter) + ci_res pi.progress += 1 pi.new_operation('Averaging state-to-state fluxes and rates', nstates * (nstates - 1)) for istate in xrange(nstates): for jstate in xrange(nstates): if istate == jstate: continue flux_ci_res = mcbs_ci_correl( cond_fluxes.iter_slice(start_iter, stop_iter)[:, istate, jstate], estimator=numpy.mean, alpha=self.mcbs_alpha, n_sets=self.mcbs_nsets, autocorrel_alpha=self.mcbs_acalpha, subsample=numpy.mean) rate_ci_res = mcbs_ci_correl( rates.iter_slice(start_iter, stop_iter)[:, istate, jstate], estimator=numpy.mean, alpha=self.mcbs_alpha, n_sets=self.mcbs_nsets, autocorrel_alpha=self.mcbs_acalpha, subsample=numpy.mean) avg_conditional_fluxes[istate, jstate] = (start_iter, stop_iter) + flux_ci_res avg_rates[istate, jstate] = (start_iter, stop_iter) + rate_ci_res pi.progress += 1 pi.new_operation('Saving averages') self.output_file['avg_rates'] = avg_rates self.output_file['avg_conditional_fluxes'] = avg_conditional_fluxes self.output_file['avg_total_fluxes'] = avg_total_fluxes for ds in ('avg_rates', 'avg_conditional_fluxes', 'avg_total_fluxes'): self.stamp_mcbs_info(self.output_file[ds]) self.output_file['state_labels'] = state_labels maxlabellen = max(map(len, state_labels)) pi.clear() print('fluxes into macrostates:') for istate in xrange(nstates): print( '{:{maxlabellen}s}: mean={:21.15e} CI=({:21.15e}, {:21.15e}) * tau^-1' .format(state_labels[istate], avg_total_fluxes['expected'][istate], avg_total_fluxes['ci_lbound'][istate], avg_total_fluxes['ci_ubound'][istate], maxlabellen=maxlabellen)) print('\nfluxes from state to state:') for istate in xrange(nstates): for jstate in xrange(nstates): if istate == jstate: continue print( '{:{maxlabellen}s} -> {:{maxlabellen}s}: mean={:21.15e} CI=({:21.15e}, {:21.15e}) * tau^-1' .format(state_labels[istate], state_labels[jstate], avg_conditional_fluxes['expected'][istate, jstate], avg_conditional_fluxes['ci_lbound'][istate, jstate], avg_conditional_fluxes['ci_ubound'][istate, jstate], maxlabellen=maxlabellen)) print('\nrates from state to state:') for istate in xrange(nstates): for jstate in xrange(nstates): if istate == jstate: continue print( '{:{maxlabellen}s} -> {:{maxlabellen}s}: mean={:21.15e} CI=({:21.15e}, {:21.15e}) * tau^-1' .format(state_labels[istate], state_labels[jstate], avg_rates['expected'][istate, jstate], avg_rates['ci_lbound'][istate, jstate], avg_rates['ci_ubound'][istate, jstate], maxlabellen=maxlabellen)) # skip evolution if not requested if self.evolution_mode == 'none' or not step_iter: return start_pts = range(start_iter, stop_iter, step_iter) target_evol = numpy.zeros((len(start_pts), nstates), dtype=ci_dtype) flux_evol = numpy.zeros((len(start_pts), nstates, nstates), dtype=ci_dtype) rate_evol = numpy.zeros((len(start_pts), nstates, nstates), dtype=ci_dtype) pi.new_operation('Calculating flux/rate evolution', len(start_pts)) futures = [] for iblock, start in enumerate(start_pts): stop = min(start + step_iter, stop_iter) if self.evolution_mode == 'cumulative': windowsize = int(self.evol_window_frac * (stop - start_iter)) block_start = max(start_iter, stop - windowsize) else: # self.evolution_mode == 'blocked' block_start = start future = self.work_manager.submit( _eval_block, kwargs=dict(iblock=iblock, start=block_start, stop=stop, nstates=nstates, total_fluxes=total_fluxes.iter_slice( block_start, stop), cond_fluxes=cond_fluxes.iter_slice( block_start, stop), rates=rates.iter_slice(block_start, stop), mcbs_alpha=self.mcbs_alpha, mcbs_nsets=self.mcbs_nsets, mcbs_acalpha=self.mcbs_acalpha)) futures.append(future) for future in self.work_manager.as_completed(futures): pi.progress += 1 target_results, condflux_results, rate_results = future.get_result( discard=True) for result in target_results: iblock, istate, ci_result = result target_evol[iblock, istate] = ci_result for result in condflux_results: iblock, istate, jstate, ci_result = result flux_evol[iblock, istate, jstate] = ci_result for result in rate_results: iblock, istate, jstate, ci_result = result rate_evol[iblock, istate, jstate] = ci_result df_ds = self.output_file.create_dataset( 'conditional_flux_evolution', data=flux_evol, shuffle=True, compression=9) tf_ds = self.output_file.create_dataset('target_flux_evolution', data=target_evol, shuffle=True, compression=9) rate_ds = self.output_file.create_dataset('rate_evolution', data=rate_evol, shuffle=True, compression=9) for ds in (df_ds, tf_ds, rate_ds): self.stamp_mcbs_info(ds)
numpy.mean, 0.05, n_sets=n_sets, sort=numpy.msort) n_uncorrel_width = uncorrel_ub - uncorrel_lb n_correl_width = correl_ub - correl_lb print(' uncorrelated: {} ({},{})'.format(uncorrel_mean, uncorrel_lb, uncorrel_ub)) print(' correlated: {} ({},{})'.format(correl_mean, correl_lb, correl_ub)) print(' width ratio c/u: {}'.format((n_correl_width / n_uncorrel_width))) print('blocked MCBS:') #uncorrel_mean, uncorrel_lb, uncorrel_ub = mcbs_ci(ds_uncorrel[::k_uncorrel+1], numpy.mean, 0.05, n_sets=1000, sort=numpy.msort) #correl_mean, correl_lb, correl_ub = mcbs_ci(ds_correl[::k_correl+1], numpy.mean, 0.05, n_sets=1000, sort=numpy.msort) uncorrel_mean, uncorrel_lb, uncorrel_ub, k_ = mcbs_ci_correl( ds_uncorrel, numpy.mean, 0.05, n_sets=n_sets, subsample=numpy.mean) correl_mean, correl_lb, correl_ub, k_ = mcbs_ci_correl(ds_correl, numpy.mean, 0.05, n_sets=n_sets, subsample=numpy.mean) b_uncorrel_width = uncorrel_ub - uncorrel_lb b_correl_width = correl_ub - correl_lb print(' uncorrelated: {} ({},{})'.format(uncorrel_mean, uncorrel_lb, uncorrel_ub)) print(' correlated: {} ({},{})'.format(correl_mean, correl_lb, correl_ub)) print(' width ratio c/u: {}'.format((b_correl_width / b_uncorrel_width))) print('width ratio blocked/naive:') print(' uncorrelated: {}'.format(b_uncorrel_width / n_uncorrel_width))
def calc_store_flux_data(self): westpa.rc.pstatus('Calculating mean flux and confidence intervals for iterations [{},{})' .format(self.iter_range.iter_start, self.iter_range.iter_stop)) fluxdata = extract_fluxes(self.iter_range.iter_start, self.iter_range.iter_stop, self.data_reader) # Create a group to store data in output_group = h5io.create_hdf5_group(self.output_h5file, 'target_flux', replace=False, creating_program=self.prog) self.output_group = output_group output_group.attrs['version_code'] = self.output_format_version self.iter_range.record_data_iter_range(output_group) n_targets = len(fluxdata) index = numpy.empty((len(fluxdata),), dtype=target_index_dtype) avg_fluxdata = numpy.empty((n_targets,), dtype=ci_dtype) for itarget, (target_label, target_fluxdata) in enumerate(fluxdata.iteritems()): # Create group and index entry index[itarget]['target_label'] = str(target_label) target_group = output_group.create_group('target_{}'.format(itarget)) self.target_groups[target_label] = target_group # Store per-iteration values target_group['n_iter'] = target_fluxdata['n_iter'] target_group['count'] = target_fluxdata['count'] target_group['flux'] = target_fluxdata['flux'] h5io.label_axes(target_group['flux'], ['n_iter'], units=['tau^-1']) # Calculate flux autocorrelation fluxes = target_fluxdata['flux'] mean_flux = fluxes.mean() fmm = fluxes - mean_flux acorr = fftconvolve(fmm,fmm[::-1]) acorr = acorr[len(acorr)//2:] acorr /= acorr[0] acorr_ds = target_group.create_dataset('flux_autocorrel', data=acorr) h5io.label_axes(acorr_ds, ['lag'], ['tau']) # Calculate overall averages and CIs #avg, lb_ci, ub_ci, correl_len = mclib.mcbs_ci_correl(fluxes, numpy.mean, self.alpha, self.n_sets, # autocorrel_alpha=self.autocorrel_alpha, subsample=numpy.mean) avg, lb_ci, ub_ci, sterr, correl_len = mclib.mcbs_ci_correl({'dataset': fluxes}, estimator=(lambda stride, dataset: numpy.mean(dataset)), alpha=self.alpha, n_sets=self.n_sets, autocorrel_alpha=self.autocorrel_alpha, subsample=numpy.mean, do_correl=self.do_correl, mcbs_enable=self.mcbs_enable ) avg_fluxdata[itarget] = (self.iter_range.iter_start, self.iter_range.iter_stop, avg, lb_ci, ub_ci, sterr, correl_len) westpa.rc.pstatus('target {!r}:'.format(target_label)) westpa.rc.pstatus(' correlation length = {} tau'.format(correl_len)) westpa.rc.pstatus(' mean flux and CI = {:e} ({:e},{:e}) tau^(-1)'.format(avg,lb_ci,ub_ci)) index[itarget]['mean_flux'] = avg index[itarget]['mean_flux_ci_lb'] = lb_ci index[itarget]['mean_flux_ci_ub'] = ub_ci index[itarget]['mean_flux_correl_len'] = correl_len # Write index and summary index_ds = output_group.create_dataset('index', data=index) index_ds.attrs['mcbs_alpha'] = self.alpha index_ds.attrs['mcbs_autocorrel_alpha'] = self.autocorrel_alpha index_ds.attrs['mcbs_n_sets'] = self.n_sets self.fluxdata = fluxdata self.output_h5file['avg_flux'] = avg_fluxdata