def run(self, summary_file_name, n_trajs_per_block=None): # this will basically create the move_info_list for part of the # summary_file, and then call super's RUN summary = open(summary_file_name, 'r') if self.summary_root_dir is None: self.summary_root_dir = os.path.dirname(summary_file_name) lines = [l for l in summary] n_steps = len(lines) if n_trajs_per_block is None: n_trajs_per_block = n_steps line_num = 0 while line_num < n_steps: if self.report_progress is not None: refresh_output("Working on MC step " + str(line_num) + "\n", output_stream=self.report_progress) end = min(line_num + n_trajs_per_block, n_steps) block = lines[line_num:end] moves = [self.parse_summary_line(l) for l in block] super(OneWayTPSConverter, self).run(moves) line_num += n_trajs_per_block
def generate_from_trajectories( self, ensembles, trajectories, preconditions=None, strategies=None, reuse_strategy='avoid', engine=None): """ Create a SampleSet with as many initial samples as possible. The goal of this is to give the initial SampleSet that would be desired. Parameters ---------- trajectories : list of :class:`.Trajectory` or :class:`.Trajectory` the input trajectories to use ensembles : list of :class:`Ensemble` or list of :class:`Ensemble` the list of ensembles to be generated. If an element is itself a list then one sample for one of the ensembles in that list if generated preconditions : list of str a list of possible steps to modify the initial list of trajectories. possible choices are 1. `sort-shortest` - sorting by shortest first, 2. `sort_median` - sorting by the middle one first and then in move away from the median length 3. `sort-longest` - sorting by the longest first 4. `reverse` - reverse the order and 5. `mirror` which will add the reversed trajectories to the list in the same order Default is `None` which means to do nothing. strategies : dict a dict that specifies the options used when ensemble functions are used to create a new sample. reuse_strategy : str if `avoid` then in a second attempt the used trajectories are tried engine : :class:`openpathsampling.engines.DyanmicsEngine` the engine used for extending moves Returns ------- :class:`.SampleSet` sampleset with samples for every initial ensemble for this scheme that could be satisfied by the given trajectories See Also -------- list_initial_ensembles """ implemented_strategies = [ 'get', # look for existing trajectories 'split', # look for existing sub-trajectories 'extend-complex', # try to extend long sub-trajectories 'extend-minimal' # try to extend short sub-trajectories ] implemented_preconditions = [ 'sort-shortest', 'sort-median', 'sort-longest', 'reverse', 'mirror' ] if preconditions is None: preconditions = ['mirror'] # create a list of trajectories trajectories = paths.Trajectory._to_list_of_trajectories(trajectories) for pre in preconditions: if pre not in implemented_preconditions: raise RuntimeError( '%s is not a valid precondition strategy. Choose from %s.' % (pre, implemented_preconditions) ) if pre == 'sort-shortest': trajectories = sorted(trajectories, key=len) elif pre == 'sort-longest': trajectories = sorted(trajectories, key=len) elif pre == 'sort-median': sorted_trajectories = sorted(trajectories, key=len) trajectories = list([p for p2 in zip( sorted_trajectories[len(sorted_trajectories) / 2:], reversed(sorted_trajectories[:len(sorted_trajectories) / 2]) ) for p in p2]) if len(sorted_trajectories) & 1: trajectories.append(sorted_trajectories[-1]) elif pre == 'reverse': trajectories = list(reversed(trajectories)) elif pre == 'mirror': trajectories = trajectories + \ [traj.reversed for traj in trajectories] # let's always try the short trajectories first # print map(lambda x: hex(id(x)), trajectories) # we will try forward/backward interleaved used_trajectories = [] # if we start with an existing sample set look at what we got # if we avoid we move the used ones to the back of the list # if we remove we remove the used ones for s in self: traj = s.trajectory if traj in trajectories: used_trajectories.append(traj) used_trajectories = sorted(used_trajectories, key=len) # print map(lambda x: hex(id(x)), used_trajectories) ensembles = [[x] if type(x) is not list else x for x in ensembles] # 1. look in the existing sample_set ensembles_to_fill, extra_ensembles = self.check_ensembles(ensembles) # we reverse because we want to be able to remove elements # from the list as we discover samples. This is easier to do # when the list is traversed backwards since indices to not # change, hence we reverse the list of ensemble and then traverse it # in reversed order ensembles_to_fill = \ list(reversed(ensembles_to_fill)) # 2. try strategies if strategies is None: # this is the default strategies = [ 'get', 'split' ] for idx, strategy in enumerate(strategies): if type(strategy) is str: strategies[idx] = (strategy, dict()) if strategies[idx][0] not in implemented_strategies: raise RuntimeError( 'Strategy `%s` is not known. Chose from %s.' % ( strategies[idx][0], implemented_strategies ) ) found_samples_str = '' for pos, ens_list in enumerate(ensembles): found_samples_str += '.' if ens_list in ensembles_to_fill else '+' for str_idx, (strategy, options) in enumerate(strategies): for idx, ens_list in reversed(list(enumerate(ensembles_to_fill))): pos = ensembles.index(ens_list) found_samples_str = \ found_samples_str[:pos] + \ '?' + found_samples_str[pos + 1:] refresh_output(( '# trying strategy #%d `%s`: still missing %d samples\n' '%s\n' ) % ( str_idx + 1, strategy, len(ensembles_to_fill), found_samples_str ), ipynb_display_only=True, print_anyway=False) if type(ens_list) is not list: ens_list = [ens_list] found = False for ens in ens_list: # create the list of options to be passed on opts = {key: value for key, value in options.items() if key not in ['exclude']} # exclude contains the Ensemble classes to be ignored if 'exclude' in options: if isinstance(ens, options['exclude']): continue # fill only the first in ens_list that can be filled sample = None if strategy == 'get': sample = ens.get_sample_from_trajectories( trajectories=trajectories, used_trajectories=used_trajectories, reuse_strategy=reuse_strategy, **opts ) elif strategy == 'split': sample = ens.split_sample_from_trajectories( trajectories=trajectories, used_trajectories=used_trajectories, reuse_strategy=reuse_strategy, **opts ) elif strategy == 'extend-complex' and engine: if hasattr(ens, 'extend_sample_from_trajectories'): sample = ens.extend_sample_from_trajectories( trajectories=trajectories, engine=engine, level='complex', **opts ) elif strategy == 'extend-minimal' and engine: if hasattr(ens, 'extend_sample_from_trajectories'): sample = ens.extend_sample_from_trajectories( trajectories=trajectories, engine=engine, level='minimal', **opts ) elif strategy == 'extend-native' and engine: if hasattr(ens, 'extend_sample_from_trajectories'): sample = ens.extend_sample_from_trajectories( trajectories=trajectories, engine=engine, level='native', **opts ) # now, if we've found a sample, add it and # make sure we chose a proper replica ID if sample is not None: found = True # another way would be to look for the smallest not # taken id. This one is simpler if len(self.replicas) > 0: replica_idx = max(0, max(self.replicas) + 1) else: replica_idx = 0 sample.replica = replica_idx logger.info(( 'generating - ensemble `%s` found sample ' 'replica %d, length %d\n') % ( ens.name, sample.replica, len(sample) )) self.append(sample) if reuse_strategy != 'all': # we mark the trajectory and its reversed as used if sample.trajectory not in used_trajectories and ( not reuse_strategy.endswith('symmetric') or sample.trajectory.reversed in used_trajectories ): used_trajectories.append(sample.trajectory) # if reuse_strategy.endswith('symmetric'): # used_trajectories.append( # sample.trajectory.reversed) # we want the list of used_trajectories to be # sorted. Short ones first. So if we have to chose # from the used_ones, use the shortest one # used_trajectories = sorted( # used_trajectories, key=len) # found a sample in this category so remove it for # other tries del ensembles_to_fill[idx] # do not try other ensembles in this category break found_samples_str = \ found_samples_str[:pos] + \ (str(str_idx + 1)[0] if found else '.') + \ found_samples_str[pos + 1:] refresh_output(( '# finished generating: still missing %d samples\n' '%s\n' ) % ( len(ensembles_to_fill), found_samples_str ), ipynb_display_only=True, print_anyway=False) return self
def initial_conditions_from_trajectories(self, trajectories, sample_set=None, strategies=None, preconditions=None, reuse_strategy='avoid-symmetric', engine=None): """ Create a SampleSet with as many initial samples as possible. The goal of this is to give the initial SampleSet that would be desired. Parameters ---------- trajectories : list of :class:`.Trajectory` or :class:`.Trajectory` the input trajectories to use sample_set : :class:`.SampleSet`, optional if given, add samples to this sampleset. Default is None, which means that this will start a new sampleset. strategies : dict a dict that specifies the options used when ensemble functions are used to create a new sample. preconditions : list of str a list of possible steps to modify the initial list of trajectories. possible choices are 1. `sort-shortest` - sorting by shortest first, 2. `sort_median` - sorting by the middle one first and then in move away from the median length 3. `sort-longest` - sorting by the longest first 4. `reverse` - reverse the order and 5. `mirror` which will add the reversed trajectories to the list in the same order Default is `None` which means to do nothing. reuse_strategy : str if `avoid` then reusing the same same trajectory twice is avoided. `avoid-symmetric` will also remove reversed copies if possible. `all` will not attempt to avoid already existing ones. `once` will strictly not reuse a trajectory and `once-symmetric` will also not use reversed copies. engine : :class:`openpathsampling.engines.DyanmicsEngine` the engine used for extending moves Returns ------- :class:`.SampleSet` sampleset with samples for every initial ensemble for this scheme that could be satisfied by the given trajectories See Also -------- list_initial_ensembles check_initial_conditions assert_initial_conditions """ if sample_set is None: sample_set = paths.SampleSet([]) ensembles = self.list_initial_ensembles() sample_set = sample_set.generate_from_trajectories( ensembles, trajectories, preconditions, strategies, reuse_strategy, engine ) refresh_output(self.initial_conditions_report(sample_set), ipynb_display_only=True, print_anyway=False) return sample_set
def generate_from_trajectories(self, ensembles, trajectories, preconditions=None, strategies=None, reuse_strategy='avoid', engine=None): """ Create a SampleSet with as many initial samples as possible. The goal of this is to give the initial SampleSet that would be desired. Parameters ---------- trajectories : list of :class:`.Trajectory` or :class:`.Trajectory` the input trajectories to use ensembles : list of :class:`Ensemble` or list of :class:`Ensemble` the list of ensembles to be generated. If an element is itself a list then one sample for one of the ensembles in that list if generated preconditions : list of str a list of possible steps to modify the initial list of trajectories. possible choices are 1. `sort-shortest` - sorting by shortest first, 2. `sort_median` - sorting by the middle one first and then in move away from the median length 3. `sort-longest` - sorting by the longest first 4. `reverse` - reverse the order and 5. `mirror` which will add the reversed trajectories to the list in the same order Default is `None` which means to do nothing. strategies : dict a dict that specifies the options used when ensemble functions are used to create a new sample. reuse_strategy : str if `avoid` then in a second attempt the used trajectories are tried engine : :class:`openpathsampling.engines.DyanmicsEngine` the engine used for extending moves Returns ------- :class:`.SampleSet` sampleset with samples for every initial ensemble for this scheme that could be satisfied by the given trajectories See Also -------- list_initial_ensembles """ implemented_strategies = [ 'get', # look for existing trajectories 'split', # look for existing sub-trajectories 'extend-complex', # try to extend long sub-trajectories 'extend-minimal' # try to extend short sub-trajectories ] implemented_preconditions = [ 'sort-shortest', 'sort-median', 'sort-longest', 'reverse', 'mirror' ] if preconditions is None: preconditions = ['mirror'] # create a list of trajectories trajectories = paths.Trajectory._to_list_of_trajectories(trajectories) for pre in preconditions: if pre not in implemented_preconditions: raise RuntimeError( '%s is not a valid precondition strategy. Choose from %s.' % (pre, implemented_preconditions)) if pre == 'sort-shortest': trajectories = sorted(trajectories, key=len) elif pre == 'sort-longest': trajectories = sorted(trajectories, key=len) elif pre == 'sort-median': sorted_trajectories = sorted(trajectories, key=len) trajectories = list([ p for p2 in zip( sorted_trajectories[len(sorted_trajectories) / 2:], reversed( sorted_trajectories[:len(sorted_trajectories) / 2])) for p in p2 ]) if len(sorted_trajectories) & 1: trajectories.append(sorted_trajectories[-1]) elif pre == 'reverse': trajectories = list(reversed(trajectories)) elif pre == 'mirror': trajectories = trajectories + \ [traj.reversed for traj in trajectories] # let's always try the short trajectories first # print map(lambda x: hex(id(x)), trajectories) # we will try forward/backward interleaved used_trajectories = [] # if we start with an existing sample set look at what we got # if we avoid we move the used ones to the back of the list # if we remove we remove the used ones for s in self: traj = s.trajectory if traj in trajectories: used_trajectories.append(traj) used_trajectories = sorted(used_trajectories, key=len) # print map(lambda x: hex(id(x)), used_trajectories) ensembles = [[x] if type(x) is not list else x for x in ensembles] # 1. look in the existing sample_set ensembles_to_fill, extra_ensembles = self.check_ensembles(ensembles) # we reverse because we want to be able to remove elements # from the list as we discover samples. This is easier to do # when the list is traversed backwards since indices to not # change, hence we reverse the list of ensemble and then traverse it # in reversed order ensembles_to_fill = \ list(reversed(ensembles_to_fill)) # 2. try strategies if strategies is None: # this is the default strategies = ['get', 'split'] for idx, strategy in enumerate(strategies): if type(strategy) is str: strategies[idx] = (strategy, dict()) if strategies[idx][0] not in implemented_strategies: raise RuntimeError( 'Strategy `%s` is not known. Chose from %s.' % (strategies[idx][0], implemented_strategies)) found_samples_str = '' for pos, ens_list in enumerate(ensembles): found_samples_str += '.' if ens_list in ensembles_to_fill else '+' for str_idx, (strategy, options) in enumerate(strategies): for idx, ens_list in reversed(list(enumerate(ensembles_to_fill))): pos = ensembles.index(ens_list) found_samples_str = \ found_samples_str[:pos] + \ '?' + found_samples_str[pos + 1:] refresh_output( ('# trying strategy #%d `%s`: still missing %d samples\n' '%s\n') % (str_idx + 1, strategy, len(ensembles_to_fill), found_samples_str), ipynb_display_only=True, print_anyway=False) if type(ens_list) is not list: ens_list = [ens_list] found = False for ens in ens_list: # create the list of options to be passed on opts = { key: value for key, value in options.items() if key not in ['exclude'] } # exclude contains the Ensemble classes to be ignored if 'exclude' in options: if isinstance(ens, options['exclude']): continue # fill only the first in ens_list that can be filled sample = None if strategy == 'get': sample = ens.get_sample_from_trajectories( trajectories=trajectories, used_trajectories=used_trajectories, reuse_strategy=reuse_strategy, **opts) elif strategy == 'split': sample = ens.split_sample_from_trajectories( trajectories=trajectories, used_trajectories=used_trajectories, reuse_strategy=reuse_strategy, **opts) elif strategy == 'extend-complex' and engine: if hasattr(ens, 'extend_sample_from_trajectories'): sample = ens.extend_sample_from_trajectories( trajectories=trajectories, engine=engine, level='complex', **opts) elif strategy == 'extend-minimal' and engine: if hasattr(ens, 'extend_sample_from_trajectories'): sample = ens.extend_sample_from_trajectories( trajectories=trajectories, engine=engine, level='minimal', **opts) elif strategy == 'extend-native' and engine: if hasattr(ens, 'extend_sample_from_trajectories'): sample = ens.extend_sample_from_trajectories( trajectories=trajectories, engine=engine, level='native', **opts) # now, if we've found a sample, add it and # make sure we chose a proper replica ID if sample is not None: found = True # another way would be to look for the smallest not # taken id. This one is simpler if len(self.replicas) > 0: replica_idx = max(0, max(self.replicas) + 1) else: replica_idx = 0 sample.replica = replica_idx logger.info(('generating - ensemble `%s` found sample ' 'replica %d, length %d\n') % (ens.name, sample.replica, len(sample))) self.append(sample) if reuse_strategy != 'all': # we mark the trajectory and its reversed as used if sample.trajectory not in used_trajectories and ( not reuse_strategy.endswith('symmetric') or sample.trajectory.reversed in used_trajectories): used_trajectories.append(sample.trajectory) # if reuse_strategy.endswith('symmetric'): # used_trajectories.append( # sample.trajectory.reversed) # we want the list of used_trajectories to be # sorted. Short ones first. So if we have to chose # from the used_ones, use the shortest one # used_trajectories = sorted( # used_trajectories, key=len) # found a sample in this category so remove it for # other tries del ensembles_to_fill[idx] # do not try other ensembles in this category break found_samples_str = \ found_samples_str[:pos] + \ (str(str_idx + 1)[0] if found else '.') + \ found_samples_str[pos + 1:] refresh_output(('# finished generating: still missing %d samples\n' '%s\n') % (len(ensembles_to_fill), found_samples_str), ipynb_display_only=True, print_anyway=False) return self