def __init__(self, max_workers=1): _Monitorable.__init__(self) if max_workers < 1: raise ValueError("Number of workers must be greater or equal to 1.") self._max_workers = max_workers self._is_started = threading.Event() # State self._options_state_lock = threading.Lock() self._options_state = {} # Queues self._queue_options = queue.Queue() self._queue_results = queue.Queue() # Signals self.options_added = Signal() self.options_running = Signal() self.options_simulated = Signal() self.options_error = Signal() self.results_saved = Signal() self.results_error = Signal() self.options_added.connect(self._on_options_added) self.options_running.connect(self._on_options_running) self.options_simulated.connect(self._on_options_simulated) self.options_error.connect(self._on_options_error) self.results_error.connect(self._on_results_error) # Dispatchers self._dispatchers_options = set() for _ in range(max(1, max_workers - 1)): dispatcher = self._create_options_dispatcher() dispatcher.options_running.connect(self.options_running) dispatcher.options_simulated.connect(self.options_simulated) dispatcher.options_error.connect(self.options_error) self._dispatchers_options.add(dispatcher) self._dispatchers_results = set() dispatcher = self._create_results_dispatcher() dispatcher.results_saved.connect(self.results_saved) dispatcher.results_error.connect(self.results_error) self._dispatchers_results.add(dispatcher) self._dispatchers = self._dispatchers_options | self._dispatchers_results
class Settings(EntityBase, EntryHDF5IOMixin): DEFAULT_FILENAME = "settings.h5" settings_changed = Signal() def __init__(self): # Units self.preferred_units = {} # X-ray line self.preferred_xray_notation = XrayNotation.IUPAC # Paths self._opendir = None self._savedir = None @classmethod def read(cls, filepath=None): if filepath is None: filepath = os.path.join(get_config_dir(), cls.DEFAULT_FILENAME) if not os.path.exists(filepath): return cls() return super().read(filepath) def write(self, filepath=None): if filepath is None: filepath = os.path.join(get_config_dir(), self.DEFAULT_FILENAME) return super().write(filepath) def set_preferred_unit(self, units): if isinstance(units, str): units = pymontecarlo.unit_registry.parse_units(units) _, base_units = pymontecarlo.unit_registry._get_base_units(units) self.preferred_units[base_units] = units def clear_preferred_units(self): self.preferred_units.clear() def to_preferred_unit(self, q, units=None): if not hasattr(q, "units"): q = pymontecarlo.unit_registry.Quantity(q, units) _, base_unit = pymontecarlo.unit_registry._get_base_units(q.units) try: preferred_unit = self.preferred_units[base_unit] return q.to(preferred_unit) except KeyError: return q.to(base_unit) @property def opendir(self): return self._opendir or self._savedir or os.getcwd() @opendir.setter def opendir(self, dirpath): self._opendir = dirpath @property def savedir(self): return self._savedir or self._opendir or os.getcwd() @savedir.setter def savedir(self, dirpath): self._savedir = dirpath # region HDF5 DATASET_PREFERRED_UNITS = "preferred units" ATTR_PREFERRED_XRAY_NOTATION = "preferred x-ray notation" ATTR_OPENDIR = "opendir" ATTR_SAVEDIR = "savedir" @classmethod def parse_hdf5(cls, group): obj = cls() units = [str(value) for value in group[cls.DATASET_PREFERRED_UNITS].asstr()] for unit in units: obj.set_preferred_unit(unit) obj.preferred_xray_notation = cls._parse_hdf5( group, cls.ATTR_PREFERRED_XRAY_NOTATION, XrayNotation ) obj.opendir = cls._parse_hdf5(group, cls.ATTR_OPENDIR, str) obj.savedir = cls._parse_hdf5(group, cls.ATTR_SAVEDIR, str) return obj def convert_hdf5(self, group): super().convert_hdf5(group) shape = (len(self.preferred_units),) dtype = h5py.special_dtype(vlen=str) dataset = group.create_dataset(self.DATASET_PREFERRED_UNITS, shape, dtype) dataset[:] = list(map(str, self.preferred_units.values())) self._convert_hdf5( group, self.ATTR_PREFERRED_XRAY_NOTATION, self.preferred_xray_notation ) self._convert_hdf5(group, self.ATTR_OPENDIR, self.opendir) self._convert_hdf5(group, self.ATTR_SAVEDIR, self.savedir)
class Project(EntityBase, EntryHDF5IOMixin): simulation_added = Signal() simulation_recalculated = Signal() def __init__(self, filepath=None): self.filepath = filepath self.simulations = [] self.lock = threading.Lock() self.recalculate_required = False def __getstate__(self): with self.lock: return (self.filepath, self.simulations) def __setstate__(self, state): filepath, simulations = state self.filepath = filepath self.simulations = simulations self.recalculate_required = True def add_simulation(self, simulation): with self.lock: if simulation in self.simulations: return identifiers = [ s.identifier for s in self.simulations if s.identifier.startswith(simulation.identifier) ] if identifiers: last = -1 for identifier in identifiers: m = re.search(r"-(\d+)$", identifier) if m is not None: last = max(last, int(m.group(1))) simulation.identifier += "-{:d}".format(last + 1) self.simulations.append(simulation) self.recalculate_required = True self.simulation_added.send(simulation) async def recalculate(self, token=None): with self.lock: if token: token.start() count = len(self.simulations) for i, simulation in enumerate(self.simulations): progress = i / count status = "Calculating simulation {}".format( simulation.identifier) if token: token.update(progress, status) newresult = False for analysis in simulation.options.analyses: newresult |= analysis.calculate(simulation, tuple(self.simulations)) if newresult: self.simulation_recalculated.send(simulation) if token: token.done() self.recalculate_required = False def create_options_dataframe( self, settings, only_different_columns=False, abbreviate_name=False, format_number=False, ): """ Returns a :class:`pandas.DataFrame`. If *only_different_columns*, the data rows will only contain the columns that are different between the options. """ list_options = [simulation.options for simulation in self.simulations] return create_options_dataframe( list_options, settings, only_different_columns, abbreviate_name, format_number, ) def create_results_dataframe(self, settings, result_classes=None, abbreviate_name=False, format_number=False): """ Returns a :class:`pandas.DataFrame`. If *result_classes* is a list of :class:`Result`, only the columns from this result classes will be returned. If ``None``, the columns from all results will be returned. """ list_results = [simulation.results for simulation in self.simulations] return create_results_dataframe(list_results, settings, result_classes, abbreviate_name, format_number) def create_dataframe( self, settings, only_different_columns=False, abbreviate_name=False, format_number=False, result_classes=None, ): """ Returns a :class:`pandas.DataFrame`, combining the :class:`pandas.DataFrame` created by :meth:`.create_options_dataframe` and :meth:`.create_results_dataframe`. """ df_options = self.create_options_dataframe(settings, only_different_columns, abbreviate_name, format_number) df_results = self.create_results_dataframe(settings, result_classes, abbreviate_name, format_number) return pd.concat([df_options, df_results], axis=1) def write(self, filepath=None): if filepath is None: filepath = self.filepath if filepath is None: raise RuntimeError("No file path given") super().write(filepath) @property def result_classes(self): """ Returns all types of result. """ classes = set() for simulation in self.simulations: classes.update(type(result) for result in simulation.results) return classes # region HDF5 GROUP_SIMULATIONS = "simulations" @classmethod def parse_hdf5(cls, group): filepath = group.file.filename project = cls(filepath) simulations = [ cls._parse_hdf5_object(group_simulation) for group_simulation in group[cls.GROUP_SIMULATIONS].values() ] with project.lock: project.simulations.extend(simulations) return project def convert_hdf5(self, group): super().convert_hdf5(group) group_simulations = group.create_group(self.GROUP_SIMULATIONS) with self.lock: for simulation in self.simulations: name = simulation.identifier group_simulation = group_simulations.create_group(name) simulation.convert_hdf5(group_simulation)
class _Runner(_Monitorable): STATE_QUEUED = "queued" STATE_RUNNING = "running" STATE_SIMULATED = "simulated" STATE_ERROR = "error" def __init__(self, max_workers=1): _Monitorable.__init__(self) if max_workers < 1: raise ValueError("Number of workers must be greater or equal to 1.") self._max_workers = max_workers self._is_started = threading.Event() # State self._options_state_lock = threading.Lock() self._options_state = {} # Queues self._queue_options = queue.Queue() self._queue_results = queue.Queue() # Signals self.options_added = Signal() self.options_running = Signal() self.options_simulated = Signal() self.options_error = Signal() self.results_saved = Signal() self.results_error = Signal() self.options_added.connect(self._on_options_added) self.options_running.connect(self._on_options_running) self.options_simulated.connect(self._on_options_simulated) self.options_error.connect(self._on_options_error) self.results_error.connect(self._on_results_error) # Dispatchers self._dispatchers_options = set() for _ in range(max(1, max_workers - 1)): dispatcher = self._create_options_dispatcher() dispatcher.options_running.connect(self.options_running) dispatcher.options_simulated.connect(self.options_simulated) dispatcher.options_error.connect(self.options_error) self._dispatchers_options.add(dispatcher) self._dispatchers_results = set() dispatcher = self._create_results_dispatcher() dispatcher.results_saved.connect(self.results_saved) dispatcher.results_error.connect(self.results_error) self._dispatchers_results.add(dispatcher) self._dispatchers = self._dispatchers_options | self._dispatchers_results def __enter__(self): self.start() return self def __exit__(self, exctype, value, tb): self.close() return False def _start(self): """ Starts running the simulations. """ if self._is_started.is_set(): raise RuntimeError("Runner already started") for dispatcher in self._dispatchers: dispatcher.start() logging.debug("Start dispatcher: %s" % dispatcher) self._is_started.set() def _create_options_dispatcher(self): raise NotImplementedError def _create_results_dispatcher(self): raise NotImplementedError def start(self): self._start() def cancel(self): """ Cancels all running simulations. """ for dispatcher in self._dispatchers: dispatcher.cancel() logging.debug("Dispatcher cancelled: %s" % dispatcher) def is_alive(self): """ Returns whether simulations are being executed. """ for dispatcher in self._dispatchers: if not dispatcher.is_alive(): return False return True def is_finished(self): unfinished = 0 with self._queue_options.all_tasks_done: unfinished += self._queue_options.unfinished_tasks with self._queue_results.all_tasks_done: unfinished += self._queue_results.unfinished_tasks return unfinished == 0 def join(self): """ Blocks until all options have been simulated. """ for dispatcher in self._dispatchers: dispatcher.raise_exception() self._queue_options.join() self._queue_results.join() for dispatcher in self._dispatchers: dispatcher.raise_exception() def close(self): """ Wait for simulation(s) to finish and closes the runner. """ self.cancel() for dispatcher in self._dispatchers: dispatcher.join() def put(self, options): """ Puts an options in queue. The options are converted using the converter of this runner's program. An :exc:`ValueError` is raised if an options with the same name was already added. This error is raised as options with the same name would lead to results been overwritten. :arg options: options to be added to the queue """ if not options.programs: raise ValueError("No program associated with options") base_options = options # copy.deepcopy(options) logging.debug("Putting %s options is queue" % base_options) list_options = [] for program in base_options.programs: converter = program.converter_class() for options in converter.convert(base_options): options.programs.clear() options.programs.add(program) options.name = options.name + "+" + program.alias freeze(options) self._queue_options.put((base_options, options)) list_options.append(options) self.options_added.fire(options) return list_options def _on_options_added(self, options): logging.debug("Options %s added" % options) with self._options_state_lock: self._options_state[options] = (self.STATE_QUEUED, "queued") def _on_options_running(self, options): logging.debug("Options %s running" % options) with self._options_state_lock: self._options_state[options] = (self.STATE_RUNNING, "running") def _on_options_simulated(self, options): logging.debug("Options %s simulated" % options) with self._options_state_lock: self._options_state[options] = (self.STATE_SIMULATED, "simulated") def _on_options_error(self, options, ex): logging.exception("Options %s error" % options) with self._options_state_lock: self._options_state[options] = (self.STATE_ERROR, str(ex)) def _on_results_error(self, results, ex): logging.exception("Results %s error" % results) with self._options_state_lock: for container in results: self._options_state[container.options] = (self.STATE_ERROR, str(ex)) def options_state(self, options): with self._options_state_lock: return self._options_state[options][0] def options_progress(self, options): with self._options_state_lock: state, _message = self._options_state[options] if state == self.STATE_SIMULATED: return 1.0 elif state == self.STATE_RUNNING: for dispatcher in self._dispatchers_options: if dispatcher.current_options != options: continue return dispatcher.progress else: return 0.0 def options_status(self, options): with self._options_state_lock: state, message = self._options_state[options] if state == self.STATE_RUNNING: for dispatcher in self._dispatchers_options: if dispatcher.current_options != options: continue return dispatcher.status else: return message @property def progress(self): with self._options_state_lock: counter = Counter(map(itemgetter(0), self._options_state.values())) simulated = counter[self.STATE_SIMULATED] error = counter[self.STATE_ERROR] total = len(self._options_state) return (simulated + error) / total @property def status(self): if not self._is_started.is_set(): return "not started" elif self.is_cancelled(): return "cancelled" elif self.is_exception_raised(): return "error occurred" elif self.is_alive(): return "running" else: return "unknown" @property def max_workers(self): return self._max_workers
class Settings(HDF5ReaderMixin, HDF5WriterMixin): DEFAULT_FILENAME = 'settings.h5' activated_programs_changed = Signal() preferred_units_changed = Signal() preferred_xrayline_notation_changed = Signal() preferred_xrayline_encoding_changed = Signal() def __init__(self): # Programs self._activated_programs = {} # key: identifier, value: program object self._available_programs = {} # key: identifier, value: program class # Units self.preferred_units = {} # X-ray line self._preferred_xrayline_notation = 'iupac' self._preferred_xrayline_encoding = 'utf16' @classmethod def read(cls, filepath=None): if filepath is None: filepath = os.path.join(get_config_dir(), cls.DEFAULT_FILENAME) return super().read(filepath) # def write(self, filepath=None): if filepath is None: filepath = os.path.join(get_config_dir(), self.DEFAULT_FILENAME) return super().write(filepath) def _validate(self, errors): # Programs for program in self.activated_programs: validator = program.create_validator() validator._validate_program(program, None, errors) def validate(self): errors = set() self._validate(errors) if errors: raise ValidationError(*errors) def update(self, settings): settings.validate() self._activated_programs.clear() self._activated_programs = settings._activated_programs.copy() self.preferred_units.clear() self.preferred_units.update(settings.preferred_units) self.preferred_units_changed.send() self.preferred_xrayline_notation = settings.preferred_xrayline_notation self.preferred_xrayline_encoding = settings.preferred_xrayline_encoding def reload(self): self._available_programs.clear() entrypoint._ENTRYPOINTS.clear() def get_activated_program(self, identifier): """ Returns the :class:`Program` matching the specified identifier. """ try: return self._activated_programs[identifier] except KeyError: raise ProgramNotFound('{} is not configured'.format(identifier)) def get_available_program_class(self, identifier): """ Returns the :class:`Program` class matching the specified identifier. """ try: self.available_programs # Initialize return self._available_programs[identifier] except KeyError: raise ProgramNotFound('{} is not available'.format(identifier)) def is_program_activated(self, identifier): return identifier in self._activated_programs def is_program_available(self, identifier): return identifier in self._available_programs def activate_program(self, program): identifier = program.getidentifier() if self.is_program_activated(identifier): raise ValueError('{} is already activated'.format(identifier)) self._activated_programs[identifier] = program self.activated_programs_changed.send() def deactivate_program(self, identifier): self._activated_programs.pop(identifier, None) self.activated_programs_changed.send() def deactivate_all_programs(self): self._activated_programs.clear() self.activated_programs_changed.send() def set_preferred_unit(self, units, quiet=False): if isinstance(units, str): units = pymontecarlo.unit_registry.parse_units(units) _, base_units = pymontecarlo.unit_registry._get_base_units(units) self.preferred_units[base_units] = units if not quiet: self.preferred_units_changed.send() def clear_preferred_units(self, quiet=False): self.preferred_units.clear() if not quiet: self.preferred_units_changed.send() def to_preferred_unit(self, q, units=None): if not hasattr(q, 'units'): q = pymontecarlo.unit_registry.Quantity(q, units) _, base_unit = pymontecarlo.unit_registry._get_base_units(q.units) try: preferred_unit = self.preferred_units[base_unit] return q.to(preferred_unit) except KeyError: return q.to(base_unit) @property def activated_programs(self): """ Returns a :class:`tuple` of all activated programs. The items are :class:`Program` instances. """ return tuple(self._activated_programs.values()) @property def available_programs(self): """ Returns a :class:`tuple` of all available programs, whether or not they are activated. The items are :class:`Program` classes. """ # Late initialization if not self._available_programs: self._available_programs = {} for clasz in entrypoint.resolve_entrypoints( ENTRYPOINT_AVAILABLE_PROGRAMS): identifier = clasz.getidentifier() self._available_programs[identifier] = clasz return tuple(self._available_programs.values()) @property def preferred_xrayline_notation(self): return self._preferred_xrayline_notation @preferred_xrayline_notation.setter def preferred_xrayline_notation(self, notation): if self._preferred_xrayline_notation == notation: return self._preferred_xrayline_notation = notation self.preferred_xrayline_notation_changed.send() @property def preferred_xrayline_encoding(self): return self._preferred_xrayline_encoding @preferred_xrayline_encoding.setter def preferred_xrayline_encoding(self, encoding): if self._preferred_xrayline_encoding == encoding: return self._preferred_xrayline_encoding = encoding self.preferred_xrayline_encoding_changed.send()
class FutureExecutor(Monitorable): submitted = Signal() def __init__(self, max_workers=1): self.max_workers = max_workers self.executor = None self.futures = set() self.failed_futures = set() self.failed_count = 0 self.cancelled_count = 0 self.submitted_count = 0 self.done_count = 0 def __enter__(self): self.start() return self def __exit__(self, exctype, value, tb): self.shutdown() return False def _on_done(self, future): if future.cancelled(): future.token.update(1.0, 'Cancelled') self.cancelled_count += 1 return if future.exception(): future.token.update(1.0, 'Error') self.failed_futures.add(future) self.failed_count += 1 return future.token.update(1.0, 'Done') self.done_count += 1 return future.result() def start(self): if self.executor is not None: return self.executor = concurrent.futures.ThreadPoolExecutor(self.max_workers) def cancel(self): """ Cancels all not completed futures. """ for future in self.futures: if not future.done(): future.cancel() def shutdown(self): if self.executor is None: return self.executor.shutdown(wait=True) self.futures.clear() def wait(self, timeout=None): """ Waits forever if *timeout* is ``None``. Otherwise waits for *timeout* and returns ``True`` if all submissions were executed, ``False`` otherwise. """ fs = [future.future for future in self.futures] _done, notdone = \ concurrent.futures.wait(fs, timeout, concurrent.futures.ALL_COMPLETED) return not notdone def _submit(self, target, *args, **kwargs): """ Submits target function with specified arguments. .. note:: The derived class should ideally create a :meth:`submit` method that calls this method. :arg target: function to execute. The first argument of the function should be a token, where the progress, status of the function can be updated:: def target(token): token.update(0.0, 'start') if token.cancelled(): return token.update(1.0, 'done') :return: a :class:`Future` object """ if self.executor is None: raise RuntimeError('Executor is not started') token = Token() future = self.executor.submit(target, token, *args, **kwargs) future2 = FutureAdapter(future, token, args, kwargs) future2.add_done_callback(self._on_done) self.futures.add(future2) self.submitted_count += 1 self.submitted.send(future2) return future2 def running(self): """ Returns whether the executor is running and can accept submission. """ return any(future.running() for future in self.futures) def done(self): return all(future.done() for future in self.futures) def cancelled(self): return False @property def progress(self): if self.submitted_count == 0: return 0 return (self.done_count + self.failed_count + self.cancelled_count) / self.submitted_count @property def status(self): return ''
class Project(HDF5ReaderMixin, HDF5WriterMixin): simulation_added = Signal() recalculated = Signal() def __init__(self, filepath=None): self.filepath = filepath self.simulations = [] self.lock = threading.Lock() self.recalculate_required = False def add_simulation(self, simulation): with self.lock: if simulation in self.simulations: return identifiers = [ s.identifier for s in self.simulations if s.identifier.startswith(simulation.identifier) ] if identifiers: last = -1 for identifier in identifiers: m = re.search(r'-(\d+)$', identifier) if m is not None: last = max(last, int(m.group(1))) simulation.identifier += '-{:d}'.format(last + 1) self.simulations.append(simulation) self.recalculate_required = True self.simulation_added.send(simulation) def recalculate(self, token=None): with self.lock: count = len(self.simulations) for i, simulation in enumerate(self.simulations): if token and token.cancelled(): break progress = i / count status = 'Calculating simulation {}'.format( simulation.identifier) if token: token.update(progress, status) for analysis in simulation.options.analyses: analysis.calculate(simulation, tuple(self.simulations)) if token: token.update(1.0, 'Done') self.recalculate_required = False self.recalculated.send() def create_options_dataframe(self, only_different_columns=False): """ Returns a :class:`pandas.DataFrame`. If *only_different_columns*, the data rows will only contain the columns that are different between the options. """ list_options = [simulation.options for simulation in self.simulations] return create_options_dataframe(list_options, only_different_columns) def create_results_dataframe(self, result_classes=None): """ Returns a :class:`pandas.DataFrame`. If *result_classes* is a list of :class:`Result`, only the columns from this result classes will be returned. If ``None``, the columns from all results will be returned. """ list_results = [simulation.results for simulation in self.simulations] return create_results_dataframe(list_results, result_classes) def write(self, filepath=None): if filepath is None: filepath = self.filepath if filepath is None: raise RuntimeError('No file path given') super().write(filepath) @property def result_classes(self): """ Returns all types of result. """ classes = set() for simulation in self.simulations: classes.update(type(result) for result in simulation.results) return classes