def calc_store_flux_data(self): westpa.rc.pstatus( 'Calculating mean flux and confidence intervals for iterations [{},{})' .format(self.iter_range.iter_start, self.iter_range.iter_stop)) fluxdata = extract_fluxes(self.iter_range.iter_start, self.iter_range.iter_stop, self.data_reader) # Create a group to store data in output_group = h5io.create_hdf5_group(self.output_h5file, 'target_flux', replace=False, creating_program=self.prog) self.output_group = output_group output_group.attrs['version_code'] = self.output_format_version self.iter_range.record_data_iter_range(output_group) n_targets = len(fluxdata) index = numpy.empty((len(fluxdata), ), dtype=target_index_dtype) avg_fluxdata = numpy.empty((n_targets, ), dtype=ci_dtype) for itarget, (target_label, target_fluxdata) in enumerate(fluxdata.items()): # Create group and index entry index[itarget]['target_label'] = str(target_label) target_group = output_group.create_group( 'target_{}'.format(itarget)) self.target_groups[target_label] = target_group # Store per-iteration values target_group['n_iter'] = target_fluxdata['n_iter'] target_group['count'] = target_fluxdata['count'] target_group['flux'] = target_fluxdata['flux'] h5io.label_axes(target_group['flux'], ['n_iter'], units=['tau^-1']) # Calculate flux autocorrelation fluxes = target_fluxdata['flux'] mean_flux = fluxes.mean() fmm = fluxes - mean_flux acorr = fftconvolve(fmm, fmm[::-1]) acorr = acorr[len(acorr) // 2:] acorr /= acorr[0] acorr_ds = target_group.create_dataset('flux_autocorrel', data=acorr) h5io.label_axes(acorr_ds, ['lag'], ['tau']) # Calculate overall averages and CIs #avg, lb_ci, ub_ci, correl_len = mclib.mcbs_ci_correl(fluxes, numpy.mean, self.alpha, self.n_sets, # autocorrel_alpha=self.autocorrel_alpha, subsample=numpy.mean) avg, lb_ci, ub_ci, sterr, correl_len = mclib.mcbs_ci_correl( {'dataset': fluxes}, estimator=(lambda stride, dataset: numpy.mean(dataset)), alpha=self.alpha, n_sets=self.n_sets, autocorrel_alpha=self.autocorrel_alpha, subsample=numpy.mean, do_correl=self.do_correl, mcbs_enable=self.mcbs_enable) avg_fluxdata[itarget] = (self.iter_range.iter_start, self.iter_range.iter_stop, avg, lb_ci, ub_ci, sterr, correl_len) westpa.rc.pstatus('target {!r}:'.format(target_label)) westpa.rc.pstatus( ' correlation length = {} tau'.format(correl_len)) westpa.rc.pstatus( ' mean flux and CI = {:e} ({:e},{:e}) tau^(-1)'.format( avg, lb_ci, ub_ci)) index[itarget]['mean_flux'] = avg index[itarget]['mean_flux_ci_lb'] = lb_ci index[itarget]['mean_flux_ci_ub'] = ub_ci index[itarget]['mean_flux_correl_len'] = correl_len # Write index and summary index_ds = output_group.create_dataset('index', data=index) index_ds.attrs['mcbs_alpha'] = self.alpha index_ds.attrs['mcbs_autocorrel_alpha'] = self.autocorrel_alpha index_ds.attrs['mcbs_n_sets'] = self.n_sets self.fluxdata = fluxdata self.output_h5file['avg_flux'] = avg_fluxdata
def go(self): pi = self.progress.indicator pi.new_operation('Initializing') with pi: self.data_reader.open('r') nbins = self.assignments_file.attrs['nbins'] state_labels = self.assignments_file['state_labels'][...] state_map = self.assignments_file['state_map'][...] nstates = len(state_labels) start_iter, stop_iter = self.iter_range.iter_start, self.iter_range.iter_stop # h5io.get_iter_range(self.assignments_file) iter_count = stop_iter - start_iter weights_ring = deque(maxlen=self.window_size) parent_ids_ring = deque(maxlen=self.window_size) bin_assignments_ring = deque(maxlen=self.window_size) label_assignments_ring = deque(maxlen=self.window_size) labeled_matrix_shape = (iter_count, nstates, nstates, nbins, nbins) unlabeled_matrix_shape = (iter_count, nbins, nbins) labeled_matrix_chunks = (1, nstates, nstates, nbins, nbins) unlabeled_matrix_chunks = (1, nbins, nbins) labeled_bin_fluxes_ds = self.output_file.create_dataset( 'labeled_bin_fluxes', shape=labeled_matrix_shape, chunks=labeled_matrix_chunks if self.do_compression else None, compression=9 if self.do_compression else None, dtype=weight_dtype) labeled_bin_rates_ds = self.output_file.create_dataset( 'labeled_bin_rates', shape=labeled_matrix_shape, chunks=labeled_matrix_chunks if self.do_compression else None, compression=9 if self.do_compression else None, dtype=weight_dtype) unlabeled_bin_rates_ds = self.output_file.create_dataset( 'bin_rates', shape=unlabeled_matrix_shape, chunks=unlabeled_matrix_chunks if self.do_compression else None, compression=9 if self.do_compression else None, dtype=weight_dtype) fluxes = numpy.empty(labeled_matrix_shape[1:], weight_dtype) labeled_rates = numpy.empty(labeled_matrix_shape[1:], weight_dtype) unlabeled_rates = numpy.empty(unlabeled_matrix_shape[1:], weight_dtype) for ds in (self.output_file, labeled_bin_fluxes_ds, labeled_bin_rates_ds, unlabeled_bin_rates_ds): h5io.stamp_iter_range(ds, start_iter, stop_iter) for ds in (labeled_bin_fluxes_ds, labeled_bin_rates_ds): h5io.label_axes(ds, [ 'iteration', 'initial state', 'final state', 'inital bin', 'final bin' ]) for ds in (unlabeled_bin_rates_ds, ): h5io.label_axes(ds, ['iteration', 'initial bin', 'final bin']) pi.new_operation('Calculating flux matrices', iter_count) # Calculate instantaneous rate matrices and trace trajectories for iiter, n_iter in enumerate(xrange(start_iter, stop_iter)): # Get data from the main HDF5 file iter_group = self.data_reader.get_iter_group(n_iter) seg_index = iter_group['seg_index'] nsegs, npts = iter_group['pcoord'].shape[0:2] weights = seg_index['weight'] parent_ids = self.data_reader.parent_id_dsspec.get_iter_data( n_iter) # Get bin and traj. ensemble assignments from the previously-generated assignments file assignment_iiter = h5io.get_iteration_entry( self.assignments_file, n_iter) bin_assignments = numpy.require( self.assignments_file['assignments'][ assignment_iiter + numpy.s_[:nsegs, :npts]], dtype=index_dtype) label_assignments = numpy.require( self.assignments_file['trajlabels'][ assignment_iiter + numpy.s_[:nsegs, :npts]], dtype=index_dtype) labeled_pops = self.assignments_file['labeled_populations'][ assignment_iiter] # Prepare to run analysis weights_ring.append(weights) parent_ids_ring.append(parent_ids) bin_assignments_ring.append(bin_assignments) label_assignments_ring.append(label_assignments) # Estimate rates using bin-to-bin fluxes estimate_rates(nbins, state_labels, weights_ring, parent_ids_ring, bin_assignments_ring, label_assignments_ring, state_map, labeled_pops, self.all_lags, fluxes, labeled_rates, unlabeled_rates) # Store bin-based kinetics data labeled_bin_fluxes_ds[iiter] = fluxes labeled_bin_rates_ds[iiter] = labeled_rates unlabeled_bin_rates_ds[iiter] = unlabeled_rates # Do a little manual clean-up to prevent memory explosion del iter_group, weights, parent_ids, bin_assignments, label_assignments, labeled_pops pi.progress += 1
def go(self): assert self.data_reader.parent_id_dsspec._h5file is None assert self.data_reader.weight_dsspec._h5file is None if hasattr(self.dssynth.dsspec, '_h5file'): assert self.dssynth.dsspec._h5file is None pi = self.progress.indicator pi.operation = 'Initializing' with pi, self.data_reader, WESTPAH5File( self.output_filename, 'w', creating_program=True) as self.output_file: assign = self.binning.mapper.assign # We always assign the entire simulation, so that no trajectory appears to start # in a transition region that doesn't get initialized in one. iter_start = 1 iter_stop = self.data_reader.current_iteration h5io.stamp_iter_range(self.output_file, iter_start, iter_stop) nbins = self.binning.mapper.nbins self.output_file.attrs['nbins'] = nbins state_map = numpy.empty((self.binning.mapper.nbins + 1, ), index_dtype) state_map[:] = 0 # state_id == nstates => unknown state # Recursive mappers produce a generator rather than a list of labels # so consume the entire generator into a list labels = [ numpy.string_(label) for label in self.binning.mapper.labels ] self.output_file.create_dataset('bin_labels', data=labels, compression=9) if self.states: nstates = len(self.states) state_map[:] = nstates # state_id == nstates => unknown state state_labels = [ numpy.string_(state['label']) for state in self.states ] for istate, sdict in enumerate(self.states): assert state_labels[istate] == numpy.string_( sdict['label']) #sanity check state_assignments = assign(sdict['coords']) for assignment in state_assignments: state_map[assignment] = istate self.output_file.create_dataset('state_map', data=state_map, compression=9, shuffle=True) self.output_file[ 'state_labels'] = state_labels #+ ['(unknown)'] else: nstates = 0 self.output_file.attrs['nstates'] = nstates # Stamp if this has been subsampled. self.output_file.attrs['subsampled'] = self.subsample iter_count = iter_stop - iter_start nsegs = numpy.empty((iter_count, ), seg_id_dtype) npts = numpy.empty((iter_count, ), seg_id_dtype) # scan for largest number of segments and largest number of points pi.new_operation('Scanning for segment and point counts', iter_stop - iter_start) for iiter, n_iter in enumerate(range(iter_start, iter_stop)): iter_group = self.data_reader.get_iter_group(n_iter) nsegs[iiter], npts[iiter] = iter_group['pcoord'].shape[0:2] pi.progress += 1 del iter_group pi.new_operation('Preparing output') # create datasets self.output_file.create_dataset('nsegs', data=nsegs, shuffle=True, compression=9) self.output_file.create_dataset('npts', data=npts, shuffle=True, compression=9) max_nsegs = nsegs.max() max_npts = npts.max() assignments_shape = (iter_count, max_nsegs, max_npts) assignments_dtype = numpy.min_scalar_type(nbins) assignments_ds = self.output_file.create_dataset( 'assignments', dtype=assignments_dtype, shape=assignments_shape, compression=4, shuffle=True, chunks=h5io.calc_chunksize(assignments_shape, assignments_dtype), fillvalue=nbins) if self.states: trajlabel_dtype = numpy.min_scalar_type(nstates) trajlabels_ds = self.output_file.create_dataset( 'trajlabels', dtype=trajlabel_dtype, shape=assignments_shape, compression=4, shuffle=True, chunks=h5io.calc_chunksize(assignments_shape, trajlabel_dtype), fillvalue=nstates) statelabels_ds = self.output_file.create_dataset( 'statelabels', dtype=trajlabel_dtype, shape=assignments_shape, compression=4, shuffle=True, chunks=h5io.calc_chunksize(assignments_shape, trajlabel_dtype), fillvalue=nstates) pops_shape = (iter_count, nstates + 1, nbins + 1) pops_ds = self.output_file.create_dataset( 'labeled_populations', dtype=weight_dtype, shape=pops_shape, compression=4, shuffle=True, chunks=h5io.calc_chunksize(pops_shape, weight_dtype)) h5io.label_axes( pops_ds, [numpy.string_(i) for i in ['iteration', 'state', 'bin']]) pi.new_operation('Assigning to bins', iter_stop - iter_start) last_labels = None # mapping of seg_id to last macrostate inhabited for iiter, n_iter in enumerate(range(iter_start, iter_stop)): #get iteration info in this block if iiter == 0: last_labels = numpy.empty((nsegs[iiter], ), index_dtype) last_labels[:] = nstates #unknown state #Slices this iteration into n_workers groups of segments, submits them to wm, splices results back together assignments, trajlabels, pops, statelabels = self.assign_iteration( n_iter, nstates, nbins, state_map, last_labels) ##Do stuff with this iteration's results last_labels = trajlabels[:, -1].copy() assignments_ds[iiter, 0:nsegs[iiter], 0:npts[iiter]] = assignments pops_ds[iiter] = pops if self.states: trajlabels_ds[iiter, 0:nsegs[iiter], 0:npts[iiter]] = trajlabels statelabels_ds[iiter, 0:nsegs[iiter], 0:npts[iiter]] = statelabels pi.progress += 1 del assignments, trajlabels, pops, statelabels for dsname in 'assignments', 'npts', 'nsegs', 'labeled_populations', 'statelabels': h5io.stamp_iter_range(self.output_file[dsname], iter_start, iter_stop)
def go(self): pi = self.progress.indicator pi.new_operation('Initializing') with pi: self.data_reader.open('r') nbins = self.assignments_file.attrs['nbins'] state_labels = self.assignments_file['state_labels'][...] state_map = self.assignments_file['state_map'][...] nstates = len(state_labels) start_iter, stop_iter = self.iter_range.iter_start, self.iter_range.iter_stop # h5io.get_iter_range(self.assignments_file) iter_count = stop_iter - start_iter weights_ring = deque(maxlen=self.window_size) parent_ids_ring = deque(maxlen=self.window_size) bin_assignments_ring = deque(maxlen=self.window_size) label_assignments_ring = deque(maxlen=self.window_size) labeled_matrix_shape = (iter_count,nstates,nstates,nbins,nbins) unlabeled_matrix_shape = (iter_count,nbins,nbins) labeled_matrix_chunks = (1, nstates, nstates, nbins, nbins) unlabeled_matrix_chunks = (1, nbins, nbins) labeled_bin_fluxes_ds = self.output_file.create_dataset('labeled_bin_fluxes', shape=labeled_matrix_shape, chunks=labeled_matrix_chunks if self.do_compression else None, compression=9 if self.do_compression else None, dtype=weight_dtype) labeled_bin_rates_ds = self.output_file.create_dataset('labeled_bin_rates', shape=labeled_matrix_shape, chunks=labeled_matrix_chunks if self.do_compression else None, compression=9 if self.do_compression else None, dtype=weight_dtype) unlabeled_bin_rates_ds = self.output_file.create_dataset('bin_rates', shape=unlabeled_matrix_shape, chunks=unlabeled_matrix_chunks if self.do_compression else None, compression=9 if self.do_compression else None, dtype=weight_dtype) fluxes = numpy.empty(labeled_matrix_shape[1:], weight_dtype) labeled_rates = numpy.empty(labeled_matrix_shape[1:], weight_dtype) unlabeled_rates = numpy.empty(unlabeled_matrix_shape[1:], weight_dtype) for ds in (self.output_file, labeled_bin_fluxes_ds, labeled_bin_rates_ds, unlabeled_bin_rates_ds): h5io.stamp_iter_range(ds, start_iter, stop_iter) for ds in (labeled_bin_fluxes_ds, labeled_bin_rates_ds): h5io.label_axes(ds, ['iteration','initial state','final state','inital bin','final bin']) for ds in (unlabeled_bin_rates_ds,): h5io.label_axes(ds, ['iteration', 'initial bin', 'final bin']) pi.new_operation('Calculating flux matrices', iter_count) # Calculate instantaneous rate matrices and trace trajectories for iiter, n_iter in enumerate(xrange(start_iter, stop_iter)): # Get data from the main HDF5 file iter_group = self.data_reader.get_iter_group(n_iter) seg_index = iter_group['seg_index'] nsegs, npts = iter_group['pcoord'].shape[0:2] weights = seg_index['weight'] parent_ids = self.data_reader.parent_id_dsspec.get_iter_data(n_iter) # Get bin and traj. ensemble assignments from the previously-generated assignments file assignment_iiter = h5io.get_iteration_entry(self.assignments_file, n_iter) bin_assignments = numpy.require(self.assignments_file['assignments'][assignment_iiter + numpy.s_[:nsegs,:npts]], dtype=index_dtype) label_assignments = numpy.require(self.assignments_file['trajlabels'][assignment_iiter + numpy.s_[:nsegs,:npts]], dtype=index_dtype) labeled_pops = self.assignments_file['labeled_populations'][assignment_iiter] # Prepare to run analysis weights_ring.append(weights) parent_ids_ring.append(parent_ids) bin_assignments_ring.append(bin_assignments) label_assignments_ring.append(label_assignments) # Estimate rates using bin-to-bin fluxes estimate_rates(nbins, state_labels, weights_ring, parent_ids_ring, bin_assignments_ring, label_assignments_ring, state_map, labeled_pops, self.all_lags, fluxes, labeled_rates, unlabeled_rates) # Store bin-based kinetics data labeled_bin_fluxes_ds[iiter] = fluxes labeled_bin_rates_ds[iiter] = labeled_rates unlabeled_bin_rates_ds[iiter] = unlabeled_rates # Do a little manual clean-up to prevent memory explosion del iter_group, weights, parent_ids, bin_assignments, label_assignments, labeled_pops pi.progress += 1
def go(self): assert self.data_reader.parent_id_dsspec._h5file is None assert self.data_reader.weight_dsspec._h5file is None if hasattr(self.dssynth.dsspec, '_h5file'): assert self.dssynth.dsspec._h5file is None pi = self.progress.indicator pi.operation = 'Initializing' with pi, self.data_reader, WESTPAH5File(self.output_filename, 'w', creating_program=True) as self.output_file: assign = self.binning.mapper.assign # We always assign the entire simulation, so that no trajectory appears to start # in a transition region that doesn't get initialized in one. iter_start = 1 iter_stop = self.data_reader.current_iteration h5io.stamp_iter_range(self.output_file, iter_start, iter_stop) nbins = self.binning.mapper.nbins self.output_file.attrs['nbins'] = nbins state_map = numpy.empty((self.binning.mapper.nbins+1,), index_dtype) state_map[:] = 0 # state_id == nstates => unknown state # Recursive mappers produce a generator rather than a list of labels # so consume the entire generator into a list labels = [label for label in self.binning.mapper.labels] self.output_file.create_dataset('bin_labels', data=labels, compression=9) if self.states: nstates = len(self.states) state_map[:] = nstates # state_id == nstates => unknown state state_labels = [state['label'] for state in self.states] for istate, sdict in enumerate(self.states): assert state_labels[istate] == sdict['label'] #sanity check state_assignments = assign(sdict['coords']) for assignment in state_assignments: state_map[assignment] = istate self.output_file.create_dataset('state_map', data=state_map, compression=9, shuffle=True) self.output_file['state_labels'] = state_labels #+ ['(unknown)'] else: nstates = 0 self.output_file.attrs['nstates'] = nstates iter_count = iter_stop - iter_start nsegs = numpy.empty((iter_count,), seg_id_dtype) npts = numpy.empty((iter_count,), seg_id_dtype) # scan for largest number of segments and largest number of points pi.new_operation ('Scanning for segment and point counts', iter_stop-iter_start) for iiter, n_iter in enumerate(xrange(iter_start,iter_stop)): iter_group = self.data_reader.get_iter_group(n_iter) nsegs[iiter], npts[iiter] = iter_group['pcoord'].shape[0:2] pi.progress += 1 del iter_group pi.new_operation('Preparing output') # create datasets self.output_file.create_dataset('nsegs', data=nsegs, shuffle=True, compression=9) self.output_file.create_dataset('npts', data=npts, shuffle=True, compression=9) max_nsegs = nsegs.max() max_npts = npts.max() assignments_shape = (iter_count,max_nsegs,max_npts) assignments_dtype = numpy.min_scalar_type(nbins) assignments_ds = self.output_file.create_dataset('assignments', dtype=assignments_dtype, shape=assignments_shape, compression=4, shuffle=True, chunks=h5io.calc_chunksize(assignments_shape, assignments_dtype), fillvalue=nbins) if self.states: trajlabel_dtype = numpy.min_scalar_type(nstates) trajlabels_ds = self.output_file.create_dataset('trajlabels', dtype=trajlabel_dtype, shape=assignments_shape, compression=4, shuffle=True, chunks=h5io.calc_chunksize(assignments_shape, trajlabel_dtype), fillvalue=nstates) pops_shape = (iter_count,nstates+1,nbins+1) pops_ds = self.output_file.create_dataset('labeled_populations', dtype=weight_dtype, shape=pops_shape, compression=4, shuffle=True, chunks=h5io.calc_chunksize(pops_shape, weight_dtype)) h5io.label_axes(pops_ds, ['iteration', 'state', 'bin']) pi.new_operation('Assigning to bins', iter_stop-iter_start) last_labels = None # mapping of seg_id to last macrostate inhabited for iiter, n_iter in enumerate(xrange(iter_start,iter_stop)): #get iteration info in this block if iiter == 0: last_labels = numpy.empty((nsegs[iiter],), index_dtype) last_labels[:] = nstates #unknown state #Slices this iteration into n_workers groups of segments, submits them to wm, splices results back together assignments, trajlabels, pops = self.assign_iteration(n_iter, nstates, nbins, state_map, last_labels) ##Do stuff with this iteration's results last_labels = trajlabels[:,-1].copy() assignments_ds[iiter, 0:nsegs[iiter], 0:npts[iiter]] = assignments pops_ds[iiter] = pops if self.states: trajlabels_ds[iiter, 0:nsegs[iiter], 0:npts[iiter]] = trajlabels pi.progress += 1 del assignments, trajlabels, pops for dsname in 'assignments', 'npts', 'nsegs', 'labeled_populations': h5io.stamp_iter_range(self.output_file[dsname], iter_start, iter_stop)
def w_postanalysis_matrix(self): pi = self.progress.indicator pi.new_operation('Initializing') self.data_reader.open('r') nbins = self.assignments_file.attrs['nbins'] state_labels = self.assignments_file['state_labels'][...] state_map = self.assignments_file['state_map'][...] nstates = len(state_labels) start_iter, stop_iter = self.iter_range.iter_start, self.iter_range.iter_stop # h5io.get_iter_range(self.assignments_file) iter_count = stop_iter - start_iter nfbins = nbins * nstates flux_shape = (iter_count, nfbins, nfbins) pop_shape = (iter_count, nfbins) h5io.stamp_iter_range(self.output_file, start_iter, stop_iter) bin_populations_ds = self.output_file.create_dataset('bin_populations', shape=pop_shape, dtype=weight_dtype) h5io.stamp_iter_range(bin_populations_ds, start_iter, stop_iter) h5io.label_axes(bin_populations_ds, ['iteration', 'bin']) flux_grp = self.output_file.create_group('iterations') self.output_file.attrs['nrows'] = nfbins self.output_file.attrs['ncols'] = nfbins fluxes = np.empty(flux_shape[1:], weight_dtype) populations = np.empty(pop_shape[1:], weight_dtype) trans = np.empty(flux_shape[1:], np.int64) # Check to make sure this isn't a data set with target states #tstates = self.data_reader.data_manager.get_target_states(0) #if len(tstates) > 0: # raise ValueError('Postanalysis reweighting analysis does not support WE simulation run under recycling conditions') pi.new_operation('Calculating flux matrices', iter_count) # Calculate instantaneous statistics for iiter, n_iter in enumerate(range(start_iter, stop_iter)): # Get data from the main HDF5 file iter_group = self.data_reader.get_iter_group(n_iter) seg_index = iter_group['seg_index'] nsegs, npts = iter_group['pcoord'].shape[0:2] weights = seg_index['weight'] # Get bin and traj. ensemble assignments from the previously-generated assignments file assignment_iiter = h5io.get_iteration_entry(self.assignments_file, n_iter) bin_assignments = np.require(self.assignments_file['assignments'][assignment_iiter + np.s_[:nsegs,:npts]], dtype=index_dtype) mask_unknown = np.zeros_like(bin_assignments, dtype=np.uint16) macrostate_iiter = h5io.get_iteration_entry(self.assignments_file, n_iter) macrostate_assignments = np.require(self.assignments_file['trajlabels'][macrostate_iiter + np.s_[:nsegs,:npts]], dtype=index_dtype) # Transform bin_assignments to take macrostate membership into account bin_assignments = nstates * bin_assignments + macrostate_assignments mask_indx = np.where(macrostate_assignments == nstates) mask_unknown[mask_indx] = 1 # Calculate bin-to-bin fluxes, bin populations and number of obs transitions calc_stats(bin_assignments, weights, fluxes, populations, trans, mask_unknown, self.sampling_frequency) # Store bin-based kinetics data bin_populations_ds[iiter] = populations # Setup sparse data structures for flux and obs fluxes_sp = sp.coo_matrix(fluxes) trans_sp = sp.coo_matrix(trans) assert fluxes_sp.nnz == trans_sp.nnz flux_iter_grp = flux_grp.create_group('iter_{:08d}'.format(n_iter)) flux_iter_grp.create_dataset('flux', data=fluxes_sp.data, dtype=weight_dtype) flux_iter_grp.create_dataset('obs', data=trans_sp.data, dtype=np.int32) flux_iter_grp.create_dataset('rows', data=fluxes_sp.row, dtype=np.int32) flux_iter_grp.create_dataset('cols', data=fluxes_sp.col, dtype=np.int32) flux_iter_grp.attrs['nrows'] = nfbins flux_iter_grp.attrs['ncols'] = nfbins # Do a little manual clean-up to prevent memory explosion del iter_group, weights, bin_assignments del macrostate_assignments pi.progress += 1 # Check and save the number of intermediate time points; this will be used to normalize the # flux and kinetics to tau in w_postanalysis_reweight. if self.assignments_file.attrs['subsampled'] == True or self.sampling_frequency == 'iteration': self.output_file.attrs['npts'] = 2 else: #self.output_file.attrs['npts'] = npts if self.sampling_frequency == 'timepoint' else 2 self.output_file.attrs['npts'] = npts
def calc_store_flux_data(self): westpa.rc.pstatus('Calculating mean flux and confidence intervals for iterations [{},{})' .format(self.iter_range.iter_start, self.iter_range.iter_stop)) fluxdata = extract_fluxes(self.iter_range.iter_start, self.iter_range.iter_stop, self.data_reader) # Create a group to store data in output_group = h5io.create_hdf5_group(self.output_h5file, 'target_flux', replace=False, creating_program=self.prog) self.output_group = output_group output_group.attrs['version_code'] = self.output_format_version self.iter_range.record_data_iter_range(output_group) n_targets = len(fluxdata) index = numpy.empty((len(fluxdata),), dtype=target_index_dtype) avg_fluxdata = numpy.empty((n_targets,), dtype=ci_dtype) for itarget, (target_label, target_fluxdata) in enumerate(fluxdata.iteritems()): # Create group and index entry index[itarget]['target_label'] = str(target_label) target_group = output_group.create_group('target_{}'.format(itarget)) self.target_groups[target_label] = target_group # Store per-iteration values target_group['n_iter'] = target_fluxdata['n_iter'] target_group['count'] = target_fluxdata['count'] target_group['flux'] = target_fluxdata['flux'] h5io.label_axes(target_group['flux'], ['n_iter'], units=['tau^-1']) # Calculate flux autocorrelation fluxes = target_fluxdata['flux'] mean_flux = fluxes.mean() fmm = fluxes - mean_flux acorr = fftconvolve(fmm,fmm[::-1]) acorr = acorr[len(acorr)//2:] acorr /= acorr[0] acorr_ds = target_group.create_dataset('flux_autocorrel', data=acorr) h5io.label_axes(acorr_ds, ['lag'], ['tau']) # Calculate overall averages and CIs #avg, lb_ci, ub_ci, correl_len = mclib.mcbs_ci_correl(fluxes, numpy.mean, self.alpha, self.n_sets, # autocorrel_alpha=self.autocorrel_alpha, subsample=numpy.mean) avg, lb_ci, ub_ci, sterr, correl_len = mclib.mcbs_ci_correl({'dataset': fluxes}, estimator=(lambda stride, dataset: numpy.mean(dataset)), alpha=self.alpha, n_sets=self.n_sets, autocorrel_alpha=self.autocorrel_alpha, subsample=numpy.mean, do_correl=self.do_correl, mcbs_enable=self.mcbs_enable ) avg_fluxdata[itarget] = (self.iter_range.iter_start, self.iter_range.iter_stop, avg, lb_ci, ub_ci, sterr, correl_len) westpa.rc.pstatus('target {!r}:'.format(target_label)) westpa.rc.pstatus(' correlation length = {} tau'.format(correl_len)) westpa.rc.pstatus(' mean flux and CI = {:e} ({:e},{:e}) tau^(-1)'.format(avg,lb_ci,ub_ci)) index[itarget]['mean_flux'] = avg index[itarget]['mean_flux_ci_lb'] = lb_ci index[itarget]['mean_flux_ci_ub'] = ub_ci index[itarget]['mean_flux_correl_len'] = correl_len # Write index and summary index_ds = output_group.create_dataset('index', data=index) index_ds.attrs['mcbs_alpha'] = self.alpha index_ds.attrs['mcbs_autocorrel_alpha'] = self.autocorrel_alpha index_ds.attrs['mcbs_n_sets'] = self.n_sets self.fluxdata = fluxdata self.output_h5file['avg_flux'] = avg_fluxdata