def publish_progress(self, specification, result): if isinstance(result, tuple): if self.diff_namer is not None: name = self.diff_namer.get_name(specification) else: name = specification_hash(specification) put_in_event_queue(self.eventQueue, ProgressEvent(name, result[0], result[1]))
def run(self, name: typing.AnyStr, specifications: typing.List[Specification], experiment: ExperimentBase, continue_from_last_run=True, propagate_exceptions=False, force_pickle=False, specification_runner: SimpleAbstractRunner = MultiprocessingRunner(), use_dashboard=True, context_type="fork", multiprocessing_lib=None,save_every_k=None) -> typing.NoReturn: """ The method called to run an experiment :param propagate_exceptions: If True, exceptions won't be caught and logged as failed experiments but will cause the program to crash (like normal), useful for debugging exeperiments :param name: The name of this experiment batch :param specifications: The list of specifications to run. Should be a list of dictionaries. Each dictionary is passed to the experiment run method :param experiment: The experiment object to run :param continue_from_last_run: If true, will not redo already completed experiments. Defaults to true :param show_progress: Whether or not to show a progress bar for experiment completion :param force_pickle: If true, don't attempt to json serialze results and default to pickling :param specification_runner: An instance of ```AbstractRunner``` that will be used to run the specification :param use_dashboard: If true, use the terminal monitoring dashboard. If false, just stream logs to stdout. :return: No return """ if multiprocessing_lib is None: import multiprocessing as mp else: mp = multiprocessing_lib ctx = mp.get_context(context_type) specification_runner.set_multiprocessing_context(ctx) if specification_runner is None: specification_runner = JoblibRunner(None) dashboard_process = None try: manager = ctx.Manager() eventQueue = manager.Queue(maxsize=2000) put_in_event_queue(eventQueue, StartExperimentEvent(name)) # Set up root smallab logger folder_loc = os.path.join("experiment_runs", name, "logs", str(datetime.datetime.now())) file_loc = os.path.join(folder_loc, "main.log") if not os.path.exists(folder_loc): os.makedirs(folder_loc) logger = logging.getLogger("smallab") logger.setLevel(logging.DEBUG) formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') # Can't do this with non-fork multiprocessing if context_type == "fork": fh = logging.FileHandler(file_loc) fh.setFormatter(formatter) logger.addHandler(fh) if not use_dashboard: sh = logging.StreamHandler() sh.setFormatter(formatter) logger.addHandler(sh) else: # fq = LogToEventQueue(eventQueue) # sh = logging.StreamHandler(fq) # sh.setFormatter(formatter) # Add to root so all logging appears in dashboard not just smallab. # logging.getLogger().addHandler(sh) dashboard_process = ctx.Process(target=start_dashboard, args=(eventQueue,)) dashboard_process.start() experiment.set_logging_folder(folder_loc) self.force_pickle = force_pickle if not os.path.exists(get_save_directory(name)): os.makedirs(get_save_directory(name)) if continue_from_last_run: need_to_run_specifications = self._find_uncompleted_specifications(name, specifications) else: need_to_run_specifications = specifications for callback in self.callbacks: callback.set_experiment_name(name) for specification in need_to_run_specifications: put_in_event_queue(eventQueue, RegisterEvent(specification_hash(specification), specification)) if isinstance(specification_runner, SimpleAbstractRunner): specification_runner.run(need_to_run_specifications, lambda specification: run_and_save(name, experiment, specification, propagate_exceptions, self.callbacks, self.force_pickle, eventQueue)) elif isinstance(specification_runner, ComplexAbstractRunner): specification_runner.run(need_to_run_specifications, name, experiment, propagate_exceptions, self.callbacks, self.force_pickle, eventQueue) self._write_to_completed_json(name, specification_runner.get_completed(), specification_runner.get_failed_specifications()) # Call batch complete functions if specification_runner.get_exceptions() != []: for callback in self.callbacks: callback.on_batch_failure(specification_runner.get_exceptions(), specification_runner.get_failed_specifications()) if specification_runner.get_completed() != []: for callback in self.callbacks: callback.on_batch_complete(specification_runner.get_completed()) finally: if dashboard_process is not None: dashboard_process.terminate()
def run_and_save(name, experiment, specification, propagate_exceptions, callbacks, force_pickle, eventQueue, diff_namer): experiment = deepcopy(experiment) if diff_namer is None: specification_id = specification_hash(specification) else: specification_id = diff_namer.get_name(specification) logger_name = "smallab.{specification_id}".format( specification_id=specification_id) logger = logging.getLogger(logger_name) logger.setLevel(logging.DEBUG) file_handler = logging.FileHandler( get_log_file(experiment, specification_id)) #formatter = logging.Formatter("%(asctime)s [%(levelname)-5.5s] %(message)s") formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') file_handler.setFormatter(formatter) logger.addHandler(file_handler) fq = LogToEventQueue(eventQueue) sh = logging.StreamHandler(fq) sh.setFormatter(formatter) logger.addHandler(sh) #TODO need to attach eventqueue logger handler here and not at base logger experiment.set_logger_name(logger_name) experiment.set_experiment_local_storage(get_experiment_local_storage(name)) experiment.set_specification_local_storage( get_specification_local_storage(name, specification, diff_namer)) put_in_event_queue(eventQueue, BeginEvent(specification_id)) def _interior_fn(): result = run_with_correct_handler(experiment, name, specification, eventQueue, diff_namer=diff_namer) if isinstance(result, types.GeneratorType): for cur_result in result: if diff_namer is not None: diff_namer.extend_name(cur_result["specification"]) save_run(name, experiment, cur_result["specification"], cur_result["result"], force_pickle, diff_namer=diff_namer, extended_keys=True) else: save_run(name, experiment, specification, result, force_pickle, diff_namer=diff_namer) for callback in callbacks: callback.on_specification_complete(specification, result) return None if not propagate_exceptions: try: _interior_fn() put_in_event_queue(eventQueue, CompleteEvent(specification_id)) except Exception as e: logger.error("Specification Failure", exc_info=True) put_in_event_queue(eventQueue, FailedEvent(specification_id)) for callback in callbacks: callback.on_specification_failure(e, specification) return e else: _interior_fn() put_in_event_queue(eventQueue, CompleteEvent(specification_id)) return None
def publish_progress(self, specification, result): if isinstance(result, tuple): put_in_event_queue( self.eventQueue, ProgressEvent(specification_hash(specification), result[0], result[1]))