def go(self): pi = self.progress.indicator pi.new_operation('Initializing') with pi: self.data_reader.open('r') nstates = self.assignments_file.attrs['nstates'] start_iter, stop_iter = self.iter_range.iter_start, self.iter_range.iter_stop # h5io.get_iter_range(self.assignments_file) iter_count = stop_iter - start_iter durations_ds = self.output_file.create_dataset( 'durations', shape=(iter_count, 0), maxshape=(iter_count, None), dtype=ed_list_dtype, chunks=(1, 15360) if self.do_compression else None, shuffle=self.do_compression, compression=9 if self.do_compression else None) durations_count_ds = self.output_file.create_dataset( 'duration_count', shape=(iter_count, ), dtype=numpy.int_, shuffle=True, compression=9) cond_fluxes_ds = self.output_file.create_dataset( 'conditional_fluxes', shape=(iter_count, nstates, nstates), dtype=weight_dtype, chunks=(h5io.calc_chunksize( (iter_count, nstates, nstates), weight_dtype) if self.do_compression else None), shuffle=self.do_compression, compression=9 if self.do_compression else None) total_fluxes_ds = self.output_file.create_dataset( 'total_fluxes', shape=(iter_count, nstates), dtype=weight_dtype, chunks=(h5io.calc_chunksize( (iter_count, nstates), weight_dtype) if self.do_compression else None), shuffle=self.do_compression, compression=9 if self.do_compression else None) cond_arrival_counts_ds = self.output_file.create_dataset( 'conditional_arrivals', shape=(iter_count, nstates, nstates), dtype=numpy.uint, chunks=(h5io.calc_chunksize( (iter_count, nstates, nstates), numpy.uint) if self.do_compression else None), shuffle=self.do_compression, compression=9 if self.do_compression else None) arrival_counts_ds = self.output_file.create_dataset( 'arrivals', shape=(iter_count, nstates), dtype=numpy.uint, chunks=(h5io.calc_chunksize( (iter_count, nstates), numpy.uint) if self.do_compression else None), shuffle=self.do_compression, compression=9 if self.do_compression else None) # copy state labels for convenience self.output_file['state_labels'] = self.assignments_file[ 'state_labels'][...] # Put nice labels on things for ds in (self.output_file, durations_count_ds, cond_fluxes_ds, total_fluxes_ds): h5io.stamp_iter_range(ds, start_iter, stop_iter) # Calculate instantaneous rate matrices and trace trajectories last_state = None pi.new_operation('Tracing trajectories', iter_count) for iiter, n_iter in enumerate(xrange(start_iter, stop_iter)): # Get data from the main HDF5 file iter_group = self.data_reader.get_iter_group(n_iter) seg_index = iter_group['seg_index'] nsegs, npts = iter_group['pcoord'].shape[0:2] weights = seg_index['weight'] #parent_ids = seg_index['parent_id'] parent_ids = self.data_reader.parent_id_dsspec.get_iter_data( n_iter) # Get bin and traj. ensemble assignments from the previously-generated assignments file assignment_iiter = h5io.get_iteration_entry( self.assignments_file, n_iter) bin_assignments = numpy.require( self.assignments_file['assignments'][ assignment_iiter + numpy.s_[:nsegs, :npts]], dtype=index_dtype) label_assignments = numpy.require( self.assignments_file['trajlabels'][ assignment_iiter + numpy.s_[:nsegs, :npts]], dtype=index_dtype) # Prepare to run analysis cond_fluxes = numpy.zeros((nstates, nstates), weight_dtype) total_fluxes = numpy.zeros((nstates, ), weight_dtype) cond_counts = numpy.zeros((nstates, nstates), numpy.uint) total_counts = numpy.zeros((nstates, ), numpy.uint) durations = [] # Estimate macrostate fluxes and calculate event durations using trajectory tracing # state is opaque to the find_macrostate_transitions function state = _fast_transition_state_copy(iiter, nstates, parent_ids, last_state) find_macrostate_transitions(nstates, weights, label_assignments, 1.0 / (npts - 1), state, cond_fluxes, cond_counts, total_fluxes, total_counts, durations) last_state = state # Store trace-based kinetics data cond_fluxes_ds[iiter] = cond_fluxes total_fluxes_ds[iiter] = total_fluxes arrival_counts_ds[iiter] = total_counts cond_arrival_counts_ds[iiter] = cond_counts durations_count_ds[iiter] = len(durations) if len(durations) > 0: durations_ds.resize( (iter_count, max(len(durations), durations_ds.shape[1]))) durations_ds[iiter, :len(durations)] = durations # Do a little manual clean-up to prevent memory explosion del iter_group, weights, parent_ids, bin_assignments, label_assignments, state, cond_fluxes, total_fluxes pi.progress += 1
def go(self): pi = self.progress.indicator pi.new_operation('Initializing') with pi: self.data_reader.open('r') nbins = self.assignments_file.attrs['nbins'] state_labels = self.assignments_file['state_labels'][...] state_map = self.assignments_file['state_map'][...] nstates = len(state_labels) start_iter, stop_iter = self.iter_range.iter_start, self.iter_range.iter_stop # h5io.get_iter_range(self.assignments_file) iter_count = stop_iter - start_iter weights_ring = deque(maxlen=self.window_size) parent_ids_ring = deque(maxlen=self.window_size) bin_assignments_ring = deque(maxlen=self.window_size) label_assignments_ring = deque(maxlen=self.window_size) labeled_matrix_shape = (iter_count, nstates, nstates, nbins, nbins) unlabeled_matrix_shape = (iter_count, nbins, nbins) labeled_matrix_chunks = (1, nstates, nstates, nbins, nbins) unlabeled_matrix_chunks = (1, nbins, nbins) labeled_bin_fluxes_ds = self.output_file.create_dataset( 'labeled_bin_fluxes', shape=labeled_matrix_shape, chunks=labeled_matrix_chunks if self.do_compression else None, compression=9 if self.do_compression else None, dtype=weight_dtype) labeled_bin_rates_ds = self.output_file.create_dataset( 'labeled_bin_rates', shape=labeled_matrix_shape, chunks=labeled_matrix_chunks if self.do_compression else None, compression=9 if self.do_compression else None, dtype=weight_dtype) unlabeled_bin_rates_ds = self.output_file.create_dataset( 'bin_rates', shape=unlabeled_matrix_shape, chunks=unlabeled_matrix_chunks if self.do_compression else None, compression=9 if self.do_compression else None, dtype=weight_dtype) fluxes = numpy.empty(labeled_matrix_shape[1:], weight_dtype) labeled_rates = numpy.empty(labeled_matrix_shape[1:], weight_dtype) unlabeled_rates = numpy.empty(unlabeled_matrix_shape[1:], weight_dtype) for ds in (self.output_file, labeled_bin_fluxes_ds, labeled_bin_rates_ds, unlabeled_bin_rates_ds): h5io.stamp_iter_range(ds, start_iter, stop_iter) for ds in (labeled_bin_fluxes_ds, labeled_bin_rates_ds): h5io.label_axes(ds, [ 'iteration', 'initial state', 'final state', 'inital bin', 'final bin' ]) for ds in (unlabeled_bin_rates_ds, ): h5io.label_axes(ds, ['iteration', 'initial bin', 'final bin']) pi.new_operation('Calculating flux matrices', iter_count) # Calculate instantaneous rate matrices and trace trajectories for iiter, n_iter in enumerate(xrange(start_iter, stop_iter)): # Get data from the main HDF5 file iter_group = self.data_reader.get_iter_group(n_iter) seg_index = iter_group['seg_index'] nsegs, npts = iter_group['pcoord'].shape[0:2] weights = seg_index['weight'] parent_ids = self.data_reader.parent_id_dsspec.get_iter_data( n_iter) # Get bin and traj. ensemble assignments from the previously-generated assignments file assignment_iiter = h5io.get_iteration_entry( self.assignments_file, n_iter) bin_assignments = numpy.require( self.assignments_file['assignments'][ assignment_iiter + numpy.s_[:nsegs, :npts]], dtype=index_dtype) label_assignments = numpy.require( self.assignments_file['trajlabels'][ assignment_iiter + numpy.s_[:nsegs, :npts]], dtype=index_dtype) labeled_pops = self.assignments_file['labeled_populations'][ assignment_iiter] # Prepare to run analysis weights_ring.append(weights) parent_ids_ring.append(parent_ids) bin_assignments_ring.append(bin_assignments) label_assignments_ring.append(label_assignments) # Estimate rates using bin-to-bin fluxes estimate_rates(nbins, state_labels, weights_ring, parent_ids_ring, bin_assignments_ring, label_assignments_ring, state_map, labeled_pops, self.all_lags, fluxes, labeled_rates, unlabeled_rates) # Store bin-based kinetics data labeled_bin_fluxes_ds[iiter] = fluxes labeled_bin_rates_ds[iiter] = labeled_rates unlabeled_bin_rates_ds[iiter] = unlabeled_rates # Do a little manual clean-up to prevent memory explosion del iter_group, weights, parent_ids, bin_assignments, label_assignments, labeled_pops pi.progress += 1
def go(self): self.data_reader.open('r') assignments_file = h5py.File(self.assignments_filename, mode='r') output_file = h5io.WESTPAH5File(self.output_filename, mode='w') pi = self.progress.indicator count = self.count timepoint = self.timepoint nbins = assignments_file.attrs['nbins']+1 assignments_ds = assignments_file['assignments'] iter_start, iter_stop = self.iter_range.iter_start, self.iter_range.iter_stop iter_count = iter_stop - iter_start h5io.check_iter_range_least(assignments_ds, iter_start, iter_stop) nsegs = assignments_file['nsegs'][h5io.get_iteration_slice(assignments_file['nsegs'], iter_start,iter_stop)] output_file.create_dataset('n_iter', dtype=n_iter_dtype, data=range(iter_start,iter_stop)) seg_count_ds = output_file.create_dataset('nsegs', dtype=numpy.uint, shape=(iter_count,nbins)) matching_segs_ds = output_file.create_dataset('seg_ids', shape=(iter_count,nbins,count), dtype=seg_id_dtype, chunks=h5io.calc_chunksize((iter_count,nbins,count), seg_id_dtype), shuffle=True, compression=9) weights_ds = output_file.create_dataset('weights', shape=(iter_count,nbins,count), dtype=weight_dtype, chunks=h5io.calc_chunksize((iter_count,nbins,count), weight_dtype), shuffle=True,compression=9) what = self.what with pi: pi.new_operation('Finding matching segments', extent=iter_count) for iiter, n_iter in enumerate(xrange(iter_start, iter_stop)): assignments = numpy.require(assignments_ds[h5io.get_iteration_entry(assignments_ds, n_iter) + numpy.index_exp[:,timepoint]], dtype=westpa.binning.index_dtype) all_weights = self.data_reader.get_iter_group(n_iter)['seg_index']['weight'] # the following Cython function just executes this loop: #for iseg in xrange(nsegs[iiter]): # segs_by_bin[iseg,assignments[iseg]] = True segs_by_bin = assignments_list_to_table(nsegs[iiter],nbins,assignments) for ibin in xrange(nbins): segs = numpy.nonzero(segs_by_bin[:,ibin])[0] seg_count_ds[iiter,ibin] = min(len(segs),count) if len(segs): weights = all_weights.take(segs) if what == 'lowweight': indices = numpy.argsort(weights)[:count] elif what == 'highweight': indices = numpy.argsort(weights)[::-1][:count] else: assert what == 'random' indices = numpy.random.permutation(len(weights)) matching_segs_ds[iiter,ibin,:len(segs)] = segs.take(indices) weights_ds[iiter,ibin,:len(segs)] = weights.take(indices) del segs, weights del assignments, segs_by_bin, all_weights pi.progress += 1
def go(self): pi = self.progress.indicator pi.new_operation('Initializing') with pi: self.data_reader.open('r') nbins = self.assignments_file.attrs['nbins'] state_labels = self.assignments_file['state_labels'][...] state_map = self.assignments_file['state_map'][...] nstates = len(state_labels) start_iter, stop_iter = self.iter_range.iter_start, self.iter_range.iter_stop # h5io.get_iter_range(self.assignments_file) iter_count = stop_iter - start_iter weights_ring = deque(maxlen=self.window_size) parent_ids_ring = deque(maxlen=self.window_size) bin_assignments_ring = deque(maxlen=self.window_size) label_assignments_ring = deque(maxlen=self.window_size) labeled_matrix_shape = (iter_count,nstates,nstates,nbins,nbins) unlabeled_matrix_shape = (iter_count,nbins,nbins) labeled_matrix_chunks = (1, nstates, nstates, nbins, nbins) unlabeled_matrix_chunks = (1, nbins, nbins) labeled_bin_fluxes_ds = self.output_file.create_dataset('labeled_bin_fluxes', shape=labeled_matrix_shape, chunks=labeled_matrix_chunks if self.do_compression else None, compression=9 if self.do_compression else None, dtype=weight_dtype) labeled_bin_rates_ds = self.output_file.create_dataset('labeled_bin_rates', shape=labeled_matrix_shape, chunks=labeled_matrix_chunks if self.do_compression else None, compression=9 if self.do_compression else None, dtype=weight_dtype) unlabeled_bin_rates_ds = self.output_file.create_dataset('bin_rates', shape=unlabeled_matrix_shape, chunks=unlabeled_matrix_chunks if self.do_compression else None, compression=9 if self.do_compression else None, dtype=weight_dtype) fluxes = numpy.empty(labeled_matrix_shape[1:], weight_dtype) labeled_rates = numpy.empty(labeled_matrix_shape[1:], weight_dtype) unlabeled_rates = numpy.empty(unlabeled_matrix_shape[1:], weight_dtype) for ds in (self.output_file, labeled_bin_fluxes_ds, labeled_bin_rates_ds, unlabeled_bin_rates_ds): h5io.stamp_iter_range(ds, start_iter, stop_iter) for ds in (labeled_bin_fluxes_ds, labeled_bin_rates_ds): h5io.label_axes(ds, ['iteration','initial state','final state','inital bin','final bin']) for ds in (unlabeled_bin_rates_ds,): h5io.label_axes(ds, ['iteration', 'initial bin', 'final bin']) pi.new_operation('Calculating flux matrices', iter_count) # Calculate instantaneous rate matrices and trace trajectories for iiter, n_iter in enumerate(xrange(start_iter, stop_iter)): # Get data from the main HDF5 file iter_group = self.data_reader.get_iter_group(n_iter) seg_index = iter_group['seg_index'] nsegs, npts = iter_group['pcoord'].shape[0:2] weights = seg_index['weight'] parent_ids = self.data_reader.parent_id_dsspec.get_iter_data(n_iter) # Get bin and traj. ensemble assignments from the previously-generated assignments file assignment_iiter = h5io.get_iteration_entry(self.assignments_file, n_iter) bin_assignments = numpy.require(self.assignments_file['assignments'][assignment_iiter + numpy.s_[:nsegs,:npts]], dtype=index_dtype) label_assignments = numpy.require(self.assignments_file['trajlabels'][assignment_iiter + numpy.s_[:nsegs,:npts]], dtype=index_dtype) labeled_pops = self.assignments_file['labeled_populations'][assignment_iiter] # Prepare to run analysis weights_ring.append(weights) parent_ids_ring.append(parent_ids) bin_assignments_ring.append(bin_assignments) label_assignments_ring.append(label_assignments) # Estimate rates using bin-to-bin fluxes estimate_rates(nbins, state_labels, weights_ring, parent_ids_ring, bin_assignments_ring, label_assignments_ring, state_map, labeled_pops, self.all_lags, fluxes, labeled_rates, unlabeled_rates) # Store bin-based kinetics data labeled_bin_fluxes_ds[iiter] = fluxes labeled_bin_rates_ds[iiter] = labeled_rates unlabeled_bin_rates_ds[iiter] = unlabeled_rates # Do a little manual clean-up to prevent memory explosion del iter_group, weights, parent_ids, bin_assignments, label_assignments, labeled_pops pi.progress += 1
def go(self): pi = self.progress.indicator pi.new_operation('Initializing') with pi: self.data_reader.open('r') nstates = self.assignments_file.attrs['nstates'] start_iter, stop_iter = self.iter_range.iter_start, self.iter_range.iter_stop # h5io.get_iter_range(self.assignments_file) iter_count = stop_iter - start_iter durations_ds = self.output_file.create_dataset('durations', shape=(iter_count,0), maxshape=(iter_count,None), dtype=ed_list_dtype, chunks=(1,15360) if self.do_compression else None, shuffle=self.do_compression, compression=9 if self.do_compression else None) durations_count_ds = self.output_file.create_dataset('duration_count', shape=(iter_count,), dtype=numpy.int_, shuffle=True,compression=9) cond_fluxes_ds = self.output_file.create_dataset('conditional_fluxes', shape=(iter_count,nstates,nstates), dtype=weight_dtype, chunks=(h5io.calc_chunksize((iter_count,nstates,nstates),weight_dtype) if self.do_compression else None), shuffle=self.do_compression, compression=9 if self.do_compression else None) total_fluxes_ds = self.output_file.create_dataset('total_fluxes', shape=(iter_count,nstates), dtype=weight_dtype, chunks=(h5io.calc_chunksize((iter_count,nstates),weight_dtype) if self.do_compression else None), shuffle=self.do_compression, compression=9 if self.do_compression else None) cond_arrival_counts_ds = self.output_file.create_dataset('conditional_arrivals', shape=(iter_count,nstates,nstates), dtype=numpy.uint, chunks=(h5io.calc_chunksize((iter_count,nstates,nstates), numpy.uint) if self.do_compression else None), shuffle=self.do_compression, compression=9 if self.do_compression else None) arrival_counts_ds = self.output_file.create_dataset('arrivals', shape=(iter_count,nstates), dtype=numpy.uint, chunks=(h5io.calc_chunksize((iter_count,nstates), numpy.uint) if self.do_compression else None), shuffle=self.do_compression, compression=9 if self.do_compression else None) # copy state labels for convenience self.output_file['state_labels'] = self.assignments_file['state_labels'][...] # Put nice labels on things for ds in (self.output_file, durations_count_ds, cond_fluxes_ds, total_fluxes_ds): h5io.stamp_iter_range(ds, start_iter, stop_iter) # Calculate instantaneous rate matrices and trace trajectories last_state = None pi.new_operation('Tracing trajectories', iter_count) for iiter, n_iter in enumerate(xrange(start_iter, stop_iter)): # Get data from the main HDF5 file iter_group = self.data_reader.get_iter_group(n_iter) seg_index = iter_group['seg_index'] nsegs, npts = iter_group['pcoord'].shape[0:2] weights = seg_index['weight'] #parent_ids = seg_index['parent_id'] parent_ids = self.data_reader.parent_id_dsspec.get_iter_data(n_iter) # Get bin and traj. ensemble assignments from the previously-generated assignments file assignment_iiter = h5io.get_iteration_entry(self.assignments_file, n_iter) bin_assignments = numpy.require(self.assignments_file['assignments'][assignment_iiter + numpy.s_[:nsegs,:npts]], dtype=index_dtype) label_assignments = numpy.require(self.assignments_file['trajlabels'][assignment_iiter + numpy.s_[:nsegs,:npts]], dtype=index_dtype) # Prepare to run analysis cond_fluxes = numpy.zeros((nstates,nstates), weight_dtype) total_fluxes = numpy.zeros((nstates,), weight_dtype) cond_counts = numpy.zeros((nstates,nstates), numpy.uint) total_counts = numpy.zeros((nstates,), numpy.uint) durations = [] # Estimate macrostate fluxes and calculate event durations using trajectory tracing # state is opaque to the find_macrostate_transitions function state = _fast_transition_state_copy(iiter, nstates, parent_ids, last_state) find_macrostate_transitions(nstates, weights, label_assignments, 1.0/(npts-1), state, cond_fluxes, cond_counts, total_fluxes, total_counts, durations) last_state = state # Store trace-based kinetics data cond_fluxes_ds[iiter] = cond_fluxes total_fluxes_ds[iiter] = total_fluxes arrival_counts_ds[iiter] = total_counts cond_arrival_counts_ds[iiter] = cond_counts durations_count_ds[iiter] = len(durations) if len(durations) > 0: durations_ds.resize((iter_count, max(len(durations), durations_ds.shape[1]))) durations_ds[iiter,:len(durations)] = durations # Do a little manual clean-up to prevent memory explosion del iter_group, weights, parent_ids, bin_assignments, label_assignments, state, cond_fluxes, total_fluxes pi.progress += 1
def go(self): self.data_reader.open('r') assignments_file = h5py.File(self.assignments_filename, mode='r') output_file = h5io.WESTPAH5File(self.output_filename, mode='w') pi = self.progress.indicator count = self.count timepoint = self.timepoint nbins = assignments_file.attrs['nbins'] + 1 assignments_ds = assignments_file['assignments'] iter_start, iter_stop = self.iter_range.iter_start, self.iter_range.iter_stop iter_count = iter_stop - iter_start h5io.check_iter_range_least(assignments_ds, iter_start, iter_stop) nsegs = assignments_file['nsegs'][h5io.get_iteration_slice( assignments_file['nsegs'], iter_start, iter_stop)] output_file.create_dataset('n_iter', dtype=n_iter_dtype, data=list(range(iter_start, iter_stop))) seg_count_ds = output_file.create_dataset('nsegs', dtype=numpy.uint, shape=(iter_count, nbins)) matching_segs_ds = output_file.create_dataset( 'seg_ids', shape=(iter_count, nbins, count), dtype=seg_id_dtype, chunks=h5io.calc_chunksize((iter_count, nbins, count), seg_id_dtype), shuffle=True, compression=9) weights_ds = output_file.create_dataset('weights', shape=(iter_count, nbins, count), dtype=weight_dtype, chunks=h5io.calc_chunksize( (iter_count, nbins, count), weight_dtype), shuffle=True, compression=9) what = self.what with pi: pi.new_operation('Finding matching segments', extent=iter_count) for iiter, n_iter in enumerate(range(iter_start, iter_stop)): assignments = numpy.require(assignments_ds[ h5io.get_iteration_entry(assignments_ds, n_iter) + numpy.index_exp[:, timepoint]], dtype=westpa.binning.index_dtype) all_weights = self.data_reader.get_iter_group( n_iter)['seg_index']['weight'] # the following Cython function just executes this loop: #for iseg in xrange(nsegs[iiter]): # segs_by_bin[iseg,assignments[iseg]] = True segs_by_bin = assignments_list_to_table( nsegs[iiter], nbins, assignments) for ibin in range(nbins): segs = numpy.nonzero(segs_by_bin[:, ibin])[0] seg_count_ds[iiter, ibin] = min(len(segs), count) if len(segs): weights = all_weights.take(segs) if what == 'lowweight': indices = numpy.argsort(weights)[:count] elif what == 'highweight': indices = numpy.argsort(weights)[::-1][:count] else: assert what == 'random' indices = numpy.random.permutation(len(weights)) matching_segs_ds[iiter, ibin, :len(segs)] = segs.take(indices) weights_ds[iiter, ibin, :len(segs)] = weights.take(indices) del segs, weights del assignments, segs_by_bin, all_weights pi.progress += 1
def w_postanalysis_matrix(self): pi = self.progress.indicator pi.new_operation('Initializing') self.data_reader.open('r') nbins = self.assignments_file.attrs['nbins'] state_labels = self.assignments_file['state_labels'][...] state_map = self.assignments_file['state_map'][...] nstates = len(state_labels) start_iter, stop_iter = self.iter_range.iter_start, self.iter_range.iter_stop # h5io.get_iter_range(self.assignments_file) iter_count = stop_iter - start_iter nfbins = nbins * nstates flux_shape = (iter_count, nfbins, nfbins) pop_shape = (iter_count, nfbins) h5io.stamp_iter_range(self.output_file, start_iter, stop_iter) bin_populations_ds = self.output_file.create_dataset('bin_populations', shape=pop_shape, dtype=weight_dtype) h5io.stamp_iter_range(bin_populations_ds, start_iter, stop_iter) h5io.label_axes(bin_populations_ds, ['iteration', 'bin']) flux_grp = self.output_file.create_group('iterations') self.output_file.attrs['nrows'] = nfbins self.output_file.attrs['ncols'] = nfbins fluxes = np.empty(flux_shape[1:], weight_dtype) populations = np.empty(pop_shape[1:], weight_dtype) trans = np.empty(flux_shape[1:], np.int64) # Check to make sure this isn't a data set with target states #tstates = self.data_reader.data_manager.get_target_states(0) #if len(tstates) > 0: # raise ValueError('Postanalysis reweighting analysis does not support WE simulation run under recycling conditions') pi.new_operation('Calculating flux matrices', iter_count) # Calculate instantaneous statistics for iiter, n_iter in enumerate(range(start_iter, stop_iter)): # Get data from the main HDF5 file iter_group = self.data_reader.get_iter_group(n_iter) seg_index = iter_group['seg_index'] nsegs, npts = iter_group['pcoord'].shape[0:2] weights = seg_index['weight'] # Get bin and traj. ensemble assignments from the previously-generated assignments file assignment_iiter = h5io.get_iteration_entry(self.assignments_file, n_iter) bin_assignments = np.require(self.assignments_file['assignments'][assignment_iiter + np.s_[:nsegs,:npts]], dtype=index_dtype) mask_unknown = np.zeros_like(bin_assignments, dtype=np.uint16) macrostate_iiter = h5io.get_iteration_entry(self.assignments_file, n_iter) macrostate_assignments = np.require(self.assignments_file['trajlabels'][macrostate_iiter + np.s_[:nsegs,:npts]], dtype=index_dtype) # Transform bin_assignments to take macrostate membership into account bin_assignments = nstates * bin_assignments + macrostate_assignments mask_indx = np.where(macrostate_assignments == nstates) mask_unknown[mask_indx] = 1 # Calculate bin-to-bin fluxes, bin populations and number of obs transitions calc_stats(bin_assignments, weights, fluxes, populations, trans, mask_unknown, self.sampling_frequency) # Store bin-based kinetics data bin_populations_ds[iiter] = populations # Setup sparse data structures for flux and obs fluxes_sp = sp.coo_matrix(fluxes) trans_sp = sp.coo_matrix(trans) assert fluxes_sp.nnz == trans_sp.nnz flux_iter_grp = flux_grp.create_group('iter_{:08d}'.format(n_iter)) flux_iter_grp.create_dataset('flux', data=fluxes_sp.data, dtype=weight_dtype) flux_iter_grp.create_dataset('obs', data=trans_sp.data, dtype=np.int32) flux_iter_grp.create_dataset('rows', data=fluxes_sp.row, dtype=np.int32) flux_iter_grp.create_dataset('cols', data=fluxes_sp.col, dtype=np.int32) flux_iter_grp.attrs['nrows'] = nfbins flux_iter_grp.attrs['ncols'] = nfbins # Do a little manual clean-up to prevent memory explosion del iter_group, weights, bin_assignments del macrostate_assignments pi.progress += 1 # Check and save the number of intermediate time points; this will be used to normalize the # flux and kinetics to tau in w_postanalysis_reweight. if self.assignments_file.attrs['subsampled'] == True or self.sampling_frequency == 'iteration': self.output_file.attrs['npts'] = 2 else: #self.output_file.attrs['npts'] = npts if self.sampling_frequency == 'timepoint' else 2 self.output_file.attrs['npts'] = npts