def __init__(self, experiment_name): self.cur_calls = 10 # resets every batch_size calls to compute_time_stats self.batch_size = 10 self.last_estimate = ( 0, 0, 0 ) # This way we can return the same time estimate every batch_size frames self.iteration_dataset = { } # Maps specifications to list of iteration times self.completion_dataset = { } # Maps specifications to list of completion times self.last_update_time = dict() self.start_time = dict() self.last_progress = dict() self.save_file = join(get_save_directory(experiment_name), "dashboard_metadata.json") self.encoders = dict( ) # feature name : dict("encoder" : OneHotEncoder, "values" : list of values) self.specification_ids_to_specification = dict() # self.keys_with_numeric_vals = [] self.first_estimate = True # Make dir and file if it doesn't exist yet if not exists(get_save_directory(experiment_name)): Path(get_save_directory(experiment_name)).mkdir(parents=True, exist_ok=True) if not exists(self.save_file): with open(self.save_file, 'w') as f: json.dump({'completions': []}, f)
def _write_to_completed_json(self, name: typing.AnyStr, completed_specifications: typing.List[Specification], failed_specifications: typing.List[Specification]): """ Writes out a file which shows the completed and failed specifications :param name: The name of the current batch :param completed_specifications: Specifications which completed successfully :param failed_specifications: Specifications which failed """ with open(os.path.join(get_save_directory(name), "completed.json"), 'w') as f: json.dump(completed_specifications, f) with open(os.path.join(get_save_directory(name), "failed.json"), 'w') as f: json.dump(failed_specifications, f)
def run(self, name: typing.AnyStr, specifications: typing.List[Specification], experiment: ExperimentBase, continue_from_last_run=True, propagate_exceptions=False, force_pickle=False, specification_runner: SimpleAbstractRunner = MultiprocessingRunner(), use_dashboard=True, context_type="fork", multiprocessing_lib=None,save_every_k=None) -> typing.NoReturn: """ The method called to run an experiment :param propagate_exceptions: If True, exceptions won't be caught and logged as failed experiments but will cause the program to crash (like normal), useful for debugging exeperiments :param name: The name of this experiment batch :param specifications: The list of specifications to run. Should be a list of dictionaries. Each dictionary is passed to the experiment run method :param experiment: The experiment object to run :param continue_from_last_run: If true, will not redo already completed experiments. Defaults to true :param show_progress: Whether or not to show a progress bar for experiment completion :param force_pickle: If true, don't attempt to json serialze results and default to pickling :param specification_runner: An instance of ```AbstractRunner``` that will be used to run the specification :param use_dashboard: If true, use the terminal monitoring dashboard. If false, just stream logs to stdout. :return: No return """ if multiprocessing_lib is None: import multiprocessing as mp else: mp = multiprocessing_lib ctx = mp.get_context(context_type) specification_runner.set_multiprocessing_context(ctx) if specification_runner is None: specification_runner = JoblibRunner(None) dashboard_process = None try: manager = ctx.Manager() eventQueue = manager.Queue(maxsize=2000) put_in_event_queue(eventQueue, StartExperimentEvent(name)) # Set up root smallab logger folder_loc = os.path.join("experiment_runs", name, "logs", str(datetime.datetime.now())) file_loc = os.path.join(folder_loc, "main.log") if not os.path.exists(folder_loc): os.makedirs(folder_loc) logger = logging.getLogger("smallab") logger.setLevel(logging.DEBUG) formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') # Can't do this with non-fork multiprocessing if context_type == "fork": fh = logging.FileHandler(file_loc) fh.setFormatter(formatter) logger.addHandler(fh) if not use_dashboard: sh = logging.StreamHandler() sh.setFormatter(formatter) logger.addHandler(sh) else: # fq = LogToEventQueue(eventQueue) # sh = logging.StreamHandler(fq) # sh.setFormatter(formatter) # Add to root so all logging appears in dashboard not just smallab. # logging.getLogger().addHandler(sh) dashboard_process = ctx.Process(target=start_dashboard, args=(eventQueue,)) dashboard_process.start() experiment.set_logging_folder(folder_loc) self.force_pickle = force_pickle if not os.path.exists(get_save_directory(name)): os.makedirs(get_save_directory(name)) if continue_from_last_run: need_to_run_specifications = self._find_uncompleted_specifications(name, specifications) else: need_to_run_specifications = specifications for callback in self.callbacks: callback.set_experiment_name(name) for specification in need_to_run_specifications: put_in_event_queue(eventQueue, RegisterEvent(specification_hash(specification), specification)) if isinstance(specification_runner, SimpleAbstractRunner): specification_runner.run(need_to_run_specifications, lambda specification: run_and_save(name, experiment, specification, propagate_exceptions, self.callbacks, self.force_pickle, eventQueue)) elif isinstance(specification_runner, ComplexAbstractRunner): specification_runner.run(need_to_run_specifications, name, experiment, propagate_exceptions, self.callbacks, self.force_pickle, eventQueue) self._write_to_completed_json(name, specification_runner.get_completed(), specification_runner.get_failed_specifications()) # Call batch complete functions if specification_runner.get_exceptions() != []: for callback in self.callbacks: callback.on_batch_failure(specification_runner.get_exceptions(), specification_runner.get_failed_specifications()) if specification_runner.get_completed() != []: for callback in self.callbacks: callback.on_batch_complete(specification_runner.get_completed()) finally: if dashboard_process is not None: dashboard_process.terminate()
def run( self, name: typing.AnyStr, specifications: typing.List[Specification], experiment: BaseExperiment, continue_from_last_run=True, propagate_exceptions=False, force_pickle=False, specification_runner: AbstractRunner = JoblibRunner(None) ) -> typing.NoReturn: """ The method called to run an experiment :param propagate_exceptions: If True, exceptions won't be caught and logged as failed experiments but will cause the program to crash (like normal), useful for debugging exeperiments :param name: The name of this experiment batch :param specifications: The list of specifications to run. Should be a list of dictionaries. Each dictionary is passed to the experiment run method :param experiment: The experiment object to run :param continue_from_last_run: If true, will not redo already completed experiments. Defaults to true :param show_progress: Whether or not to show a progress bar for experiment completion :param force_pickle: If true, don't attempt to json serialze results and default to pickling :param specification_runner: An instance of ```AbstractRunner``` that will be used to run the specification :return: No return """ # Set up root smallab logger folder_loc = os.path.join("experiment_runs", name, "logs", str(datetime.datetime.now())) file_loc = os.path.join(folder_loc, "main.log") if not os.path.exists(folder_loc): os.makedirs(folder_loc) logger = logging.getLogger("smallab") logger.setLevel(logging.DEBUG) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh = logging.FileHandler(file_loc) fh.setFormatter(formatter) logger.addHandler(fh) sh = logging.StreamHandler() sh.setFormatter(formatter) logger.addHandler(sh) experiment.set_logging_folder(folder_loc) self.force_pickle = force_pickle if not os.path.exists(get_save_directory(name)): os.makedirs(get_save_directory(name)) if continue_from_last_run: need_to_run_specifications = self._find_uncompleted_specifications( name, specifications) else: need_to_run_specifications = specifications for callback in self.callbacks: callback.set_experiment_name(name) specification_runner.run( need_to_run_specifications, lambda specification: self.__run_and_save( name, experiment, specification, propagate_exceptions)) self._write_to_completed_json( name, specification_runner.get_completed(), specification_runner.get_failed_specifications()) # Call batch complete functions if specification_runner.get_exceptions() != []: for callback in self.callbacks: callback.on_batch_failure( specification_runner.get_exceptions(), specification_runner.get_failed_specifications()) if specification_runner.get_completed() != []: for callback in self.callbacks: callback.on_batch_complete( specification_runner.get_completed())
print(results["result"]["number"]) from smallab.specification_generator import SpecificationGenerator # If you want to run a lot of experiments but not manual write out each one, use the specification generator. # Note: This is also JSON serializable, so you could store this in a json file generation_specification = {"seed": [1, 2, 3, 4, 5, 6, 7, 8], "num_calls": [1, 2, 3]} # Call the generate method. Will create the cross product. specifications = SpecificationGenerator().generate(generation_specification) print(specifications) runner.run("random_number_from_generator", specifications, SimpleExperiment(), continue_from_last_run=True) # Read back our results for root, _, files in os.walk(get_save_directory("random_number_from_generator")): for fname in files: if ".pkl" in fname: with open(os.path.join(root, fname), "rb") as f: results = dill.load(f) print(results["specification"]["seed"]) print(results["result"]["number"]) # If you have an experiment you want run on a lot of computers you can use the MultiComputerGenerator # You assign each computer a number from 0..number_of_computers-1 and it gives each computer every number_of_computerth specification from smallab.specification_generator import MultiComputerGenerator all_specifications = SpecificationGenerator().from_json_file('test.json') g1 = MultiComputerGenerator(0, 2) g2 = MultiComputerGenerator(1, 2)