def rebin_current(self, parent_segments): '''Reconstruct walkers for the current iteration based on (presumably) new binning. The previous iteration's segments must be provided (as ``parent_segments``) in order to update endpoint types appropriately.''' self._prep_we() self._parent_map = { segment.seg_id: segment for segment in parent_segments } # Create new segments for the next iteration # We assume that everything is going to continue without being touched by recycling or WE, and # adjust later new_pcoord_array = self.system.new_pcoord_array n_iter = None for ibin, _bin in enumerate(self.final_binning): for segment in _bin: if n_iter is None: n_iter = segment.n_iter else: assert segment.n_iter == n_iter new_segment = Segment(n_iter=segment.n_iter, parent_id=segment.parent_id, weight=segment.weight, wtg_parent_ids=set(segment.wtg_parent_ids or []), pcoord=new_pcoord_array(), status=Segment.SEG_STATUS_PREPARED) new_segment.pcoord[0] = segment.pcoord[0] self.next_iter_binning[ibin].add(new_segment) self._run_we()
def segment(self, init_pcoord, final_pcoord, weight=1.0): segment = Segment(n_iter=1, seg_id=self._seg_id, pcoord=self.system.new_pcoord_array(), weight=weight) segment.pcoord[0] = init_pcoord segment.pcoord[1] = final_pcoord self._seg_id += 1 return segment
def test_merge_by_weight(self): selected_counts = {0: 0, 1: 0} alpha = 0.01 nrounds = 1000 from scipy.stats import binom # lower and upper bounds of 95% CI for selecting the segment with weight 1/3 lb = binom.ppf(alpha / 2.0, nrounds, 1.0 / 3.0) ub = binom.ppf(1.0 - alpha / 2.0, nrounds, 1.0 / 3.0) system = WESTSystem() system.bin_mapper = RectilinearBinMapper([[0.0, 1.0]]) system.bin_target_counts = np.array([1]) system.pcoord_len = 2 self.we_driver = WEDriver(system=system) self.system = system self._seg_id = 0 segments = [ Segment(n_iter=1, seg_id=0, pcoord=np.array([[0], [0.25]], dtype=np.float32), weight=1.0 / 3.0), Segment(n_iter=1, seg_id=1, pcoord=np.array([[0], [0.75]], dtype=np.float32), weight=2.0 / 3.0), ] for _iround in range(nrounds): for segment in segments: segment.endpoint_type = Segment.SEG_ENDPOINT_UNSET self.we_driver.new_iteration() self.we_driver.assign(segments) self.we_driver.construct_next() assert len(self.we_driver.next_iter_binning[0]) == 1 newseg = self.we_driver.next_iter_binning[0].pop() assert segments[ newseg. parent_id].endpoint_type == Segment.SEG_ENDPOINT_CONTINUES assert segments[ ~newseg.parent_id].endpoint_type == Segment.SEG_ENDPOINT_MERGED selected_counts[newseg.parent_id] += 1 print(selected_counts) assert ( lb <= selected_counts[0] <= ub ), 'Incorrect proportion of histories selected.' 'this is expected about {:%} of the time; retry test.'.format( alpha)
def test_split_with_adjust_istates(self): # this is a split followed by merge, for segments which are initial states self.system.bin_target_counts = np.array([5, 5]) segments = [ self.segment(1.5, 0.5, weight=0.125), self.segment(1.5, 0.5, weight=0.125), self.segment(0.0, 1.5, weight=0.375), self.segment(0.0, 1.5, weight=0.375) ] self.we_driver.new_iteration() self.we_driver._prep_we() self.we_driver.used_initial_states[-1] = None self.we_driver.used_initial_states[-2] = None for ibin, bin in enumerate(self.we_driver.next_iter_binning): pc = np.array([[0.5 + ibin], [0.0]]) for iseg in range(6): segment = Segment(n_iter=1, seg_id=None, weight=1.0 / 12.0, parent_id=-(ibin + 1), pcoord=pc) bin.add(segment) for ibin in range(len(self.we_driver.next_iter_binning)): # This will raise KeyError if initial state tracking is done improperly self.we_driver._adjust_count(ibin) assert len(self.we_driver.next_iter_binning[0]) == 5 assert len(self.we_driver.next_iter_binning[1]) == 5
def construct_next(self): '''Construct walkers for the next iteration, by running weighted ensemble recycling and bin/split/merge on the segments previously assigned to bins using ``assign``. Enough unused initial states must be present in ``self.avail_initial_states`` for every recycled walker to be assigned an initial state. After this function completes, ``self.flux_matrix`` contains a valid flux matrix for this iteration (including any contributions from recycling from the previous iteration), and ``self.next_iter_segments`` contains a list of segments ready for the next iteration, with appropriate values set for weight, endpoint type, parent walkers, and so on. ''' self._prep_we() # Create new segments for the next iteration # We assume that everything is going to continue without being touched by recycling or WE, and # adjust later new_pcoord_array = self.system.new_pcoord_array n_iter = None for ibin, _bin in enumerate(self.final_binning): for segment in _bin: if n_iter is None: n_iter = segment.n_iter else: assert segment.n_iter == n_iter segment.endpoint_type = Segment.SEG_ENDPOINT_CONTINUES new_segment = Segment(n_iter=segment.n_iter + 1, parent_id=segment.seg_id, weight=segment.weight, wtg_parent_ids=[segment.seg_id], pcoord=new_pcoord_array(), status=Segment.SEG_STATUS_PREPARED) new_segment.pcoord[0] = segment.pcoord[-1] self.next_iter_binning[ibin].add(new_segment) # Store a link to the parent segment, so we can update its endpoint status as we need, # based on its ID self._parent_map[segment.seg_id] = segment self._run_we() log.debug('used initial states: {!r}'.format(self.used_initial_states)) log.debug('available initial states: {!r}'.format( self.avail_initial_states))
def get_segments_by_id(self, n_iter, seg_ids, include_pcoords=True): '''Get segments from the data manager, employing caching where possible''' if len(seg_ids) == 0: return [] seg_index = self.get_seg_index(n_iter) all_wtg_parent_ids = self.get_wtg_parent_array(n_iter) segments = [] if include_pcoords: pcoords = self.get_pcoords(n_iter, seg_ids) for (isegid, seg_id) in enumerate(seg_ids): row = seg_index[seg_id] parents_offset = row['wtg_offset'] n_parents = row['wtg_n_parents'] segment = Segment( seg_id=seg_id, n_iter=n_iter, status=row['status'], endpoint_type=row['endpoint_type'], walltime=row['walltime'], cputime=row['cputime'], weight=row['weight'], ) if include_pcoords: segment.pcoord = pcoords[isegid] parent_ids = all_wtg_parent_ids[parents_offset:parents_offset + n_parents] segment.wtg_parent_ids = { int(parent_id) for parent_id in parent_ids } segment.parent_id = int(parent_ids[0]) segments.append(segment) return segments
def _split_walker(self, segment, m, bin): '''Split the walker ``segment`` (in ``bin``) into ``m`` walkers''' bin.remove(segment) new_segments = [] for _inew in range(0, m): new_segment = Segment( n_iter=segment.n_iter, #previously incremented weight=segment.weight / m, parent_id=segment.parent_id, wtg_parent_ids=set(segment.wtg_parent_ids), pcoord=segment.pcoord.copy(), status=Segment.SEG_STATUS_PREPARED) new_segment.pcoord[0, :] = segment.pcoord[0, :] new_segments.append(new_segment) bin.update(new_segments) if log.isEnabledFor(logging.DEBUG): log.debug('splitting {!r} into {:d}:\n {!r}'.format( segment, m, new_segments)) return new_segments
def update_args_env_segment(self, template_args, environ, segment): template_args['segment'] = segment environ[self.ENV_CURRENT_SEG_INITPOINT] = Segment.initpoint_type_names[ segment.initpoint_type] if segment.initpoint_type == Segment.SEG_INITPOINT_CONTINUES: # Could use actual parent object here if the work manager cared to pass that much data # to us (we'd need at least the subset of parents for all segments sent in the call to propagate) # that may make a good west.cfg option for future crazy extensibility, but for now, # just populate the bare minimum parent = Segment(n_iter=segment.n_iter - 1, seg_id=segment.parent_id) parent_template_args = dict(template_args) parent_template_args['segment'] = parent environ[self.ENV_PARENT_SEG_ID] = str( segment.parent_id if segment.parent_id is not None else -1) environ[self.ENV_PARENT_DATA_REF] = self.makepath( self.segment_ref_template, parent_template_args) elif segment.initpoint_type == Segment.SEG_INITPOINT_NEWTRAJ: # This segment is initiated from a basis state; WEST_PARENT_SEG_ID and WEST_PARENT_DATA_REF are # set to the basis state ID and data ref initial_state = self.initial_states[segment.initial_state_id] basis_state = self.basis_states[initial_state.basis_state_id] if self.ENV_BSTATE_ID not in environ: self.update_args_env_basis_state(template_args, environ, basis_state) if self.ENV_ISTATE_ID not in environ: self.update_args_env_initial_state(template_args, environ, initial_state) assert initial_state.istate_type in ( InitialState.ISTATE_TYPE_BASIS, InitialState.ISTATE_TYPE_GENERATED) if initial_state.istate_type == InitialState.ISTATE_TYPE_BASIS: environ[self.ENV_PARENT_DATA_REF] = environ[ self.ENV_BSTATE_DATA_REF] else: # initial_state.type == InitialState.ISTATE_TYPE_GENERATED environ[self.ENV_PARENT_DATA_REF] = environ[ self.ENV_ISTATE_DATA_REF] environ[self.ENV_CURRENT_SEG_ID] = str( segment.seg_id if segment.seg_id is not None else -1) environ[self.ENV_CURRENT_SEG_DATA_REF] = self.makepath( self.segment_ref_template, template_args) return template_args, environ
def from_data_manager(cls, n_iter, seg_id, data_manager=None): '''Construct and return a trajectory trace whose last segment is identified by ``seg_id`` in the iteration number ``n_iter``.''' data_manager = data_manager or westpa.rc.get_data_manager() # These values are used later on endpoint_type = None pcoord_dtype = None pcoord_pt_shape = None seginfo = [] parent_id = seg_id while n_iter > 0 and parent_id >= 0: seg_id = parent_id iter_group = data_manager.get_iter_group(n_iter) pcoord_ds = iter_group['pcoord'] seg_index = iter_group['seg_index'] n_segs = pcoord_ds.shape[0] pcoord_len = pcoord_ds.shape[1] assert seg_id < n_segs indexrow = seg_index[seg_id] final_pcoord = pcoord_ds[seg_id, pcoord_len - 1] weight = indexrow['weight'] cputime = indexrow['cputime'] walltime = indexrow['walltime'] try: parent_id = int(indexrow['parent_id']) except IndexError: # old HDF5 version parent_id = int( iter_group['parents'][indexrow['parents_offset']]) if endpoint_type is None: endpoint_type = indexrow['endpoint_type'] pcoord_pt_shape = pcoord_ds.shape[2:] pcoord_dtype = pcoord_ds.dtype seginfo.append( (n_iter, seg_id, weight, walltime, cputime, final_pcoord)) del iter_group, pcoord_ds, seg_index n_iter -= 1 # loop terminates with parent_id set to the identifier of the initial state, # seg_id set to the identifier of the first segment in the trajectory, and # n_iter set to one less than the iteration of the first segment first_iter = n_iter + 1 first_seg_id = seg_id first_parent_id = parent_id # Initial segment (for fetching initial state) first_segment = Segment(n_iter=first_iter, seg_id=first_seg_id, parent_id=first_parent_id) seginfo.reverse() summary_dtype = np.dtype([ ('n_iter', n_iter_dtype), ('seg_id', seg_id_dtype), ('weight', weight_dtype), ('walltime', utime_dtype), ('cputime', utime_dtype), ('final_pcoord', pcoord_dtype, pcoord_pt_shape), ]) summary = np.array(seginfo, dtype=summary_dtype) try: initial_state = data_manager.get_segment_initial_states( [first_segment], first_iter)[0] except KeyError: # old HDF5 version assert parent_id < 0 istate_pcoord = data_manager.get_iter_group(first_iter)['pcoord'][ first_seg_id, 0] istate_id = -(first_parent_id + 1) basis_state = None initial_state = InitialState(istate_id, None, iter_created=0, pcoord=istate_pcoord) else: basis_state = data_manager.get_basis_states(first_iter)[ initial_state.basis_state_id] return cls(summary, endpoint_type, basis_state, initial_state, data_manager)
def entry_point(): parser = argparse.ArgumentParser('w_fork', description='''\ Prepare a new weighted ensemble simulation from an existing one at a particular point. A new HDF5 file is generated. In the case of executable propagation, it is the user's responsibility to prepare the new simulation directory appropriately, particularly making the old simulation's restart data from the appropriate iteration available as the new simulations initial state data; a mapping of old simulation segment to new simulation initial states is created, both in the new HDF5 file and as a flat text file, to aid in this. Target states and basis states for the new simulation are taken from those in the original simulation. ''') westpa.rc.add_args(parser) parser.add_argument( '-i', '--input', dest='input_h5file', help='''Create simulation from the given INPUT_H5FILE (default: read from configuration file.''') parser.add_argument( '-I', '--iteration', dest='n_iter', type=int, help= '''Take initial distribution for new simulation from iteration N_ITER (default: last complete iteration).''') parser.add_argument( '-o', '--output', dest='output_h5file', default='forked.h5', help= '''Save new simulation HDF5 file as OUTPUT (default: %(default)s).''') parser.add_argument( '--istate-map', default='istate_map.txt', help= '''Write text file describing mapping of existing segments to new initial states in ISTATE_MAP (default: %(default)s).''' ) parser.add_argument('--no-headers', action='store_true', help='''Do not write header to ISTATE_MAP''') args = parser.parse_args() westpa.rc.process_args(args) # Open old HDF5 file dm_old = westpa.rc.new_data_manager() if args.input_h5file: dm_old.we_h5filename = args.input_h5file dm_old.open_backing(mode='r') # Get iteration if necessary n_iter = args.n_iter or dm_old.current_iteration - 1 # Create and open new HDF5 file dm_new = westpa.rc.new_data_manager() dm_new.we_h5filename = args.output_h5file dm_new.prepare_backing() dm_new.open_backing() # Copy target states target_states = dm_old.get_target_states(n_iter) dm_new.save_target_states(target_states, n_iter) # Copy basis states basis_states = dm_old.get_basis_states(n_iter) dm_new.create_ibstate_group(basis_states, n_iter=1) # Transform old segments into initial states and new segments # We produce one initial state and one corresponding # new segment for each old segment. Further adjustment # can be accomplished by using w_binning. old_iter_group = dm_old.get_iter_group(n_iter) old_index = old_iter_group['seg_index'][...] old_pcoord_ds = old_iter_group['pcoord'] n_segments = old_pcoord_ds.shape[0] pcoord_len = old_pcoord_ds.shape[1] pcoord_ndim = old_pcoord_ds.shape[2] old_final_pcoords = old_pcoord_ds[:, pcoord_len - 1, :] istates = dm_new.create_initial_states(n_segments, n_iter=1) segments = [] state_map_dtype = np.dtype([('old_n_iter', n_iter_dtype), ('old_seg_id', seg_id_dtype), ('new_istate_id', seg_id_dtype)]) state_map = np.empty((n_segments, ), dtype=state_map_dtype) state_map['old_n_iter'] = n_iter for (iseg, (index_row, pcoord)) in enumerate(zip(old_index, old_final_pcoords)): istate = istates[iseg] istate.iter_created = 0 istate.iter_used = 1 istate.istate_type = InitialState.ISTATE_TYPE_RESTART istate.istate_status = InitialState.ISTATE_STATUS_PREPARED istate.pcoord = pcoord segment = Segment(n_iter=1, seg_id=iseg, weight=index_row['weight'], parent_id=-(istate.state_id + 1), wtg_parent_ids=[-(istate.state_id + 1)], status=Segment.SEG_STATUS_PREPARED) segment.pcoord = np.zeros((pcoord_len, pcoord_ndim), dtype=pcoord.dtype) segment.pcoord[0] = pcoord segments.append(segment) state_map[iseg]['old_seg_id'] = iseg state_map[iseg]['new_istate_id'] = istate.state_id dm_new.update_initial_states(istates, n_iter=0) dm_new.prepare_iteration(n_iter=1, segments=segments) # Update current iteration and close both files dm_new.current_iteration = 1 dm_new.close_backing() dm_old.close_backing() # Write state map istate_map_file = open(args.istate_map, 'wt') if not args.no_headers: istate_map_file.write( '# mapping from previous segment IDs to new initial states\n') istate_map_file.write('# generated by w_fork\n') istate_map_file.write('# column 0: old simulation n_iter\n') istate_map_file.write('# column 1: old simulation seg_id\n') istate_map_file.write('# column 2: new simulation initial state ID\n') for row in state_map: istate_map_file.write( '{old_n_iter:20d} {old_seg_id:20d} {new_istate_id:20d}\n'. format(old_n_iter=int(row['old_n_iter']), old_seg_id=int(row['old_seg_id']), new_istate_id=int(row['new_istate_id'])))
def populate_initial(self, initial_states, weights, system=None): '''Create walkers for a new weighted ensemble simulation. One segment is created for each provided initial state, then binned and split/merged as necessary. After this function is called, next_iter_segments will yield the new segments to create, used_initial_states will contain data about which of the provided initial states were used, and avail_initial_states will contain data about which initial states were unused (because their corresponding walkers were merged out of existence). ''' # This has to be down here to avoid an import race from westpa.core.data_manager import weight_dtype EPS = numpy.finfo(weight_dtype).eps system = system or westpa.core.rc.get_system_driver() self.new_iteration(initial_states=[], target_states=[], bin_mapper=system.bin_mapper, bin_target_counts=system.bin_target_counts) # Create dummy segments segments = [] for (seg_id, (initial_state, weight)) in enumerate(zip(initial_states, weights)): dummy_segment = Segment(n_iter=0, seg_id=seg_id, parent_id=-(initial_state.state_id + 1), weight=weight, wtg_parent_ids=set( [-(initial_state.state_id + 1)]), pcoord=system.new_pcoord_array(), status=Segment.SEG_STATUS_PREPARED) dummy_segment.pcoord[[0, -1]] = initial_state.pcoord segments.append(dummy_segment) # Adjust weights, if necessary tprob = sum(weights) if abs(1.0 - tprob) > len(weights) * EPS: pscale = 1.0 / tprob log.warning( 'Weights of initial segments do not sum to unity; scaling by {:g}' .format(pscale)) for segment in segments: segment.weight *= pscale self.assign(segments, initializing=True) self.construct_next() # We now have properly-constructed initial segments, except for parent information, # and we need to mark initial states as used or unused istates_by_id = {state.state_id: state for state in initial_states} dummysegs_by_id = self._parent_map self.avail_initial_states = dict(istates_by_id) self.used_initial_states = {} for segment in self.next_iter_segments: segment.parent_id = dummysegs_by_id[segment.parent_id].parent_id segment.wtg_parent_ids = set([segment.parent_id]) assert segment.initpoint_type == Segment.SEG_INITPOINT_NEWTRAJ istate = istates_by_id[segment.initial_state_id] try: self.used_initial_states[ istate.state_id] = self.avail_initial_states.pop( istate.state_id) except KeyError: # Shared by more than one segment, and already marked as used pass for used_istate in self.used_initial_states.values(): used_istate.iter_used = 1
def _merge_walkers(self, segments, cumul_weight, bin): '''Merge the given ``segments`` in ``bin``, previously sorted by weight, into one conglomerate segment. ``cumul_weight`` is the cumulative sum of the weights of the ``segments``; this may be None to calculate here.''' if cumul_weight is None: cumul_weight = numpy.add.accumulate( [segment.weight for segment in segments]) glom = Segment( n_iter=segments[0]. n_iter, # assumed correct (and equal among all segments) weight=cumul_weight[len(segments) - 1], status=Segment.SEG_STATUS_PREPARED, pcoord=self.system.new_pcoord_array(), ) # Select the history to use # The following takes a random number in the interval 0 <= x < glom.weight, then # sees where this value falls among the (sorted) weights of the segments being merged; # this ensures that a walker with (e.g.) twice the weight of its brethren has twice the # probability of having its history selected for continuation iparent = numpy.digitize((random.uniform(0, glom.weight), ), cumul_weight)[0] gparent_seg = segments[iparent] # Inherit history from this segment ("gparent" stands for "glom parent", as opposed to historical # parent). glom.parent_id = gparent_seg.parent_id glom.pcoord[0, :] = gparent_seg.pcoord[0, :] # Weight comes from all segments being merged, and therefore all their # parent segments glom.wtg_parent_ids = set() for segment in segments: glom.wtg_parent_ids |= segment.wtg_parent_ids # Remove merged walkers from consideration before treating initial states bin.difference_update(segments) # The historical parent of gparent is continued; all others are marked as merged for segment in segments: if segment is gparent_seg: # we must ignore initial states here... if segment.parent_id >= 0: self._parent_map[ segment. parent_id].endpoint_type = Segment.SEG_ENDPOINT_CONTINUES else: # and "unuse" an initial state here (recall that initial states are in 1:1 correspondence # with the segments they initiate), except when a previously-split particle is being # merged if segment.parent_id >= 0: self._parent_map[ segment. parent_id].endpoint_type = Segment.SEG_ENDPOINT_MERGED else: if segment.initial_state_id in { segment.initial_state_id for segment in bin }: log.debug( 'initial state in use by other walker; not removing' ) else: initial_state = self.used_initial_states.pop( segment.initial_state_id) log.debug( 'freeing initial state {!r} for future use (merged)' .format(initial_state)) self.avail_initial_states[ initial_state.state_id] = initial_state initial_state.iter_used = None if log.isEnabledFor(logging.DEBUG): log.debug('merging ({:d}) {!r} into 1:\n {!r}'.format( len(segments), segments, glom)) bin.add(glom)
def segment(self, init_pcoord, final_pcoord, weight=1.0): segment = Segment(n_iter=1, seg_id=1123, pcoord=self.sim_manager.system.new_pcoord_array(), weight=weight) segment.pcoord[0] = init_pcoord segment.pcoord[1] = final_pcoord segment.parent_id = 1 return segment