Exemple #1
0
    def testmain(self):

        # Same specification as before
        generation_specification = {"seed": [1, 2, 3, 4, 5, 6, 7, 8], "num_calls": [[10, 20, 30]]}
        specifications = SpecificationGenerator().generate(generation_specification)

        output_generation_specification = {"seed": [1, 2, 3, 4, 5, 6, 7, 8], "num_calls": [10, 20, 30]}
        output_specifications = SpecificationGenerator().generate(output_generation_specification)

        name = "test"
        # This time we will run them all in parallel
        runner = ExperimentRunner()
        expr = SimpleExperiment()
        runner.run(name, specifications, expr, specification_runner=MultiprocessingRunner(),
                   use_dashboard=True, propagate_exceptions=True,context_type="spawn")
        log_base = os.path.join("experiment_runs",name,"logs")
        for root, dirs, files in  os.walk(log_base):
            for file in files:
                with open(os.path.join(root,file),"r") as f:
                    lines = f.readlines()
                    self.assertNotEqual([],lines)

        for result in experiment_iterator(name):
            if result["result"] != []:
                output_specifications.remove(result["specification"])
        self.assertEqual([],output_specifications)
Exemple #2
0
    def testmain(self):

        # Same specification as before
        generation_specification = {
            "seed": [1, 2, 3, 4, 5, 6, 7, 8],
            "num_calls": [[10, 20, 30]]
        }
        specifications = SpecificationGenerator().generate(
            generation_specification)

        output_generation_specification = {
            "seed": [1, 2, 3, 4, 5, 6, 7, 8],
            "num_calls": [10, 20, 30]
        }
        output_specifications = SpecificationGenerator().generate(
            output_generation_specification)

        name = "test"
        # This time we will run them all in parallel
        runner = ExperimentRunner()
        runner.run(name,
                   specifications,
                   SimpleExperiment(),
                   specification_runner=MultiprocessingRunner(),
                   use_dashboard=False,
                   propagate_exceptions=True)
        for result in experiment_iterator(name):
            if result["result"] != []:
                output_specifications.remove(result["specification"])
        self.assertEqual([], output_specifications)
Exemple #3
0
    )
    for spec in specifications:
        if spec["seed"] == 0:
            print(spec)

    runner = ExperimentRunner()
    map_memory(base_specs["file"], base_specs["state_space_dimensionality"])
    DEBUG = False

    if DEBUG:
        runner.run(name,
                   specifications,
                   PlanningExperiment(),
                   propagate_exceptions=True,
                   specification_runner=MainRunner(),
                   use_dashboard=False,
                   force_pickle=True,
                   context_type="fork")
    else:
        gpus = 4
        jobs_per_gpu = 2
        resources = list(product(list(range(gpus)), list(range(jobs_per_gpu))))
        runner.run(name,
                   specifications,
                   PlanningExperiment(),
                   propagate_exceptions=False,
                   specification_runner=MultiprocessingRunner(),
                   context_type="fork",
                   use_dashboard=True,
                   force_pickle=True)
Exemple #4
0
    def run(self, name: typing.AnyStr, specifications: typing.List[Specification], experiment: ExperimentBase,
            continue_from_last_run=True, propagate_exceptions=False,
            force_pickle=False, specification_runner: SimpleAbstractRunner = MultiprocessingRunner(),
            use_dashboard=True, context_type="fork", multiprocessing_lib=None,save_every_k=None) -> typing.NoReturn:

        """
        The method called to run an experiment
        :param propagate_exceptions: If True, exceptions won't be caught and logged as failed experiments but will cause the program to crash (like normal), useful for debugging exeperiments
        :param name: The name of this experiment batch
        :param specifications: The list of specifications to run. Should be a list of dictionaries. Each dictionary is passed to the experiment run method
        :param experiment: The experiment object to run
        :param continue_from_last_run: If true, will not redo already completed experiments. Defaults to true
        :param show_progress: Whether or not to show a progress bar for experiment completion
        :param force_pickle: If true, don't attempt to json serialze results and default to pickling
        :param specification_runner: An instance of ```AbstractRunner``` that will be used to run the specification
        :param use_dashboard: If true, use the terminal monitoring dashboard. If false, just stream logs to stdout.
        :return: No return
        """
        if multiprocessing_lib is None:
            import multiprocessing as mp
        else:
            mp = multiprocessing_lib
        ctx = mp.get_context(context_type)
        specification_runner.set_multiprocessing_context(ctx)
        if specification_runner is None:
            specification_runner = JoblibRunner(None)
        dashboard_process = None
        try:
            manager = ctx.Manager()
            eventQueue = manager.Queue(maxsize=2000)
            put_in_event_queue(eventQueue, StartExperimentEvent(name))
            # Set up root smallab logger
            folder_loc = os.path.join("experiment_runs", name, "logs", str(datetime.datetime.now()))
            file_loc = os.path.join(folder_loc, "main.log")
            if not os.path.exists(folder_loc):
                os.makedirs(folder_loc)
            logger = logging.getLogger("smallab")
            logger.setLevel(logging.DEBUG)
            formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
            # Can't do this with non-fork multiprocessing
            if context_type == "fork":
                fh = logging.FileHandler(file_loc)
                fh.setFormatter(formatter)
                logger.addHandler(fh)
            if not use_dashboard:
                sh = logging.StreamHandler()
                sh.setFormatter(formatter)
                logger.addHandler(sh)
            else:
                # fq = LogToEventQueue(eventQueue)
                # sh = logging.StreamHandler(fq)
                # sh.setFormatter(formatter)
                # Add to root so all logging appears in dashboard not just smallab.
                # logging.getLogger().addHandler(sh)
                dashboard_process = ctx.Process(target=start_dashboard, args=(eventQueue,))
                dashboard_process.start()
            experiment.set_logging_folder(folder_loc)

            self.force_pickle = force_pickle
            if not os.path.exists(get_save_directory(name)):
                os.makedirs(get_save_directory(name))

            if continue_from_last_run:
                need_to_run_specifications = self._find_uncompleted_specifications(name, specifications)
            else:
                need_to_run_specifications = specifications
            for callback in self.callbacks:
                callback.set_experiment_name(name)

            for specification in need_to_run_specifications:
                put_in_event_queue(eventQueue, RegisterEvent(specification_hash(specification), specification))
            if isinstance(specification_runner, SimpleAbstractRunner):
                specification_runner.run(need_to_run_specifications,
                                         lambda specification: run_and_save(name, experiment, specification,
                                                                            propagate_exceptions, self.callbacks,
                                                                            self.force_pickle, eventQueue))
            elif isinstance(specification_runner, ComplexAbstractRunner):
                specification_runner.run(need_to_run_specifications, name, experiment, propagate_exceptions,
                                         self.callbacks, self.force_pickle, eventQueue)

            self._write_to_completed_json(name, specification_runner.get_completed(),
                                          specification_runner.get_failed_specifications())

            # Call batch complete functions
            if specification_runner.get_exceptions() != []:
                for callback in self.callbacks:
                    callback.on_batch_failure(specification_runner.get_exceptions(),
                                              specification_runner.get_failed_specifications())

            if specification_runner.get_completed() != []:
                for callback in self.callbacks:
                    callback.on_batch_complete(specification_runner.get_completed())
        finally:
            if dashboard_process is not None:
                dashboard_process.terminate()
            # time.sleep(int(.5 * self.r))
        #this experiment can have a random transient failure!
        #Since it's checkpointed, it will likely succeed after running it again
        if random.randint(0,100) > 90:
            raise Exception("Something bad happened, a moth flew into the computer!")
        if self.i >= self.num_calls:
            #Done with the experiment, return the results dictionary like normal
            return {"number": self.r}
        else:
            #This experiment isn't done, return the progress as a tuple to update the dashboard
            return (self.i, self.num_calls)

    # Tells the experiment framework how many iterations this will experiment run for
    def max_iterations(self, specification):
        return specification["num_calls"]


#Same specification as before
generation_specification = {"seed": [1, 2, 3, 4, 5, 6, 7, 8], "num_calls": [100, 200, 300]}
specifications = SpecificationGenerator().generate(generation_specification)

name = "checkpointed_run"
#This time we will run them all in parallel
runner = ExperimentRunner()
runner.run(name, specifications, SimpleExperiment(),specification_runner=MultiprocessingRunner())

#Some of our experiments may have failed, let's call run again to hopefully solve that
runner.run(name, specifications, SimpleExperiment(),specification_runner=MultiprocessingRunner())

#Cleanup example
delete_experiments_folder(name)
Exemple #6
0
                    print(results["specification"]["seed"])
                    print(results["result"]["number"])

    # If you have an experiment you want run on a lot of computers you can use the MultiComputerGenerator
    # You assign each computer a number from 0..number_of_computers-1 and it gives each computer every number_of_computerth specification
    from smallab.specification_generator import MultiComputerGenerator

    all_specifications = SpecificationGenerator().from_json_file('test.json')

    g1 = MultiComputerGenerator(0, 2)
    g2 = MultiComputerGenerator(1, 2)
    specifications_1 = g1.from_json_file("test.json")
    specifications_2 = g2.from_json_file("test.json")

    assert len(specifications_1) + len(specifications_2) == len(all_specifications)

    # Need to freeze the sets in order to do set manipulation on dictionaries
    specifications_1 = set([frozenset(sorted(x.items())) for x in specifications_1])
    specifications_2 = set([frozenset(sorted(x.items())) for x in specifications_2])
    all_specifications = set([frozenset(sorted(x.items())) for x in all_specifications])

    # This will generate two disjoint sets of specifications
    assert specifications_1.isdisjoint(specifications_2)
    # That together make the whole specification
    assert specifications_1.union(specifications_2) == all_specifications

    # You can use the provided logging callbacks to log completion and failure of specific specifcations

    runner.run('with_logging', SpecificationGenerator().from_json_file("test.json"), SimpleExperiment(),
               continue_from_last_run=True, specification_runner=MultiprocessingRunner())
            specification["num_calls"] = self.i
            result = {"r": self.r}
            progress = self.i
            max_iterations = self.num_calls
            return OverlappingOutputCheckpointedExperimentReturnValue(should_continue, specification, result, progress,
                                                                      max_iterations)
        else:
            # This experiment isn't done, return the progress as a tuple to update the dashboard
            return (self.i, self.num_calls)
    #Tells the dashboard how many iterations this experiment will run for
    def max_iterations(self,specification):
        return specification["num_calls"]


# Same specification as before
generation_specification = {"seed": [1, 2, 3, 4, 5, 6, 7, 8], "num_calls": (10, 20, 30)}
specifications = SpecificationGenerator().generate(generation_specification)

name = "overlapping_checkpointed_run"
# This time we will run them all in parallel
runner = ExperimentRunner()
runner.run(name, specifications, SimpleExperiment(), specification_runner=MultiprocessingRunner(), use_dashboard=False,
           propagate_exceptions=True)

# Some of our experiments may have failed, let's call run again to hopefully solve that
runner.run(name, specifications, SimpleExperiment(), specification_runner=MultiprocessingRunner(), use_dashboard=False,
           propagate_exceptions=True)

# Cleanup example
delete_experiments_folder(name)
Exemple #8
0
                                 "seed": list(range(5)),
                                 "alpha_param": 6,
                                 "beta_param":1,
                                 "epsilon": 10,
                                 "delta": 0.1,
                                 "plan_commitment_algorithm": "n_steps",
                                 "plan_threshold": [1],
                                 "sample_observations": False,
                                 "use_expected_improvement":False,
                                 "planning_steps": 200
                                 }



    ##Create shared memory
    map_memory(generation_specifications["file"], generation_specifications["state_space_dimensionality"])

    specifications = SpecificationGenerator().generate(generation_specifications)
    runner = ExperimentRunner()
    DEBUG = False


    if DEBUG:
        runner.run(name, specifications, PlanningExperiment(), propagate_exceptions=True,
                   specification_runner=MainRunner(), use_dashboard=False, force_pickle=True, context_type="fork")
    else:

        runner.run(name, specifications, PlanningExperiment(), propagate_exceptions=False,
                   specification_runner=MultiprocessingRunner(), context_type="fork", use_dashboard=True,
                   force_pickle=True)