Esempio n. 1
0
def main(mode, workflow, problem, run, **kwargs):
    """Create & submit the DH search via Balsam"""
    job = pre_submit(problem, run, workflow)
    if os.path.exists(problem):
        problem = os.path.abspath(problem)
    if os.path.exists(run):
        run = os.path.abspath(run)

    if mode == "nas":
        print("Creating NAS(PPO) BalsamJob...", end="", flush=True)
        setup_nas(job, problem, run, **kwargs)
        print("OK")
    elif mode == "hps":
        print("Creating HPS(AMBS) BalsamJob...", end="", flush=True)
        setup_ambs(job, problem, run, **kwargs)
        print("OK")

    print("Performing job submission...")
    submit_qlaunch(
        kwargs["project"],
        kwargs["queue"],
        kwargs["nodes"],
        kwargs["time_minutes"],
        kwargs["job_mode"],
        workflow,
    )
    banner(f"Success. The search will run at: {job.working_directory}")
Esempio n. 2
0
    def __init__(
        self,
        problem: str,
        run: str,
        evaluator: str,
        max_evals: int = 1000000,
        seed: int = None,
        num_nodes_master: int = 1,
        num_workers: int = None,
        log_dir: int = None,
        **kwargs,
    ):
        kwargs["problem"] = problem
        kwargs["run"] = run
        kwargs["evaluator"] = evaluator
        kwargs["max_evals"] = max_evals  # * For retro compatibility
        kwargs["seed"] = seed

        # Loading problem instance and run function
        self.problem = util.generic_loader(problem, "Problem")
        if self.problem.seed == None:
            self.problem.seed = seed
        else:
            kwargs["seed"] = self.problem.seed
        self.run_func = util.generic_loader(run, "run")

        notice = f"Maximizing the return value of function: {run}"
        logger.info(notice)
        util.banner(notice)

        self.evaluator = Evaluator.create(
            self.run_func,
            method=evaluator,
            num_nodes_master=num_nodes_master,
            num_workers=num_workers,
            **kwargs,
        )
        self.num_workers = self.evaluator.num_workers
        self.max_evals = max_evals
        self.log_dir = os.getcwd() if log_dir is None else log_dir

        # set the random seed
        np.random.seed(self.problem.seed)

        logger.info(f"Options: " + pformat(kwargs, indent=4))
        logger.info("Hyperparameter space definition: " +
                    pformat(self.problem.space, indent=4))
        logger.info(f"Created {evaluator} evaluator")
        logger.info(f"Evaluator: num_workers is {self.num_workers}")
Esempio n. 3
0
    def __init__(self,
                 problem: str,
                 run: str,
                 evaluator: str,
                 max_evals: int = 100,
                 seed: int = None,
                 **kwargs):
        kwargs['problem'] = problem
        kwargs['run'] = run
        kwargs['evaluator'] = evaluator
        kwargs['max_evals'] = max_evals  # * For retro compatibility
        kwargs['seed'] = seed

        self.problem = util.generic_loader(problem, 'Problem')
        if self.problem.seed == None:
            self.problem.seed = seed
        else:
            kwargs['seed'] = self.problem.seed
        self.run_func = util.generic_loader(run, 'run')
        notice = f'Maximizing the return value of function: {run}'
        logger.info(notice)
        util.banner(notice)

        self.evaluator = Evaluator.create(self.run_func,
                                          method=evaluator,
                                          **kwargs)
        self.num_workers = self.evaluator.num_workers
        self.max_evals = max_evals

        # set the random seed
        np.random.seed(self.problem.seed)

        logger.info(f'Options: ' + pformat(kwargs, indent=4))
        logger.info('Hyperparameter space definition: ' +
                    pformat(self.problem.space, indent=4))
        logger.info(f'Created {evaluator} evaluator')
        logger.info(f'Evaluator: num_workers is {self.num_workers}')
Esempio n. 4
0
def main(
    mode,
    search,
    workflow,
    problem,
    run,
    time_minutes,
    nodes,
    queue,
    project,
    max_evals,
    num_cpus_per_task,
    num_gpus_per_task,
    activation_script,
    horovod,
    **kwargs,
):
    """Create & submit the DH search via Balsam"""
    activation_script = os.path.abspath(activation_script)

    # Test if "run", "problem" and "workflow" are correct
    validate(problem, run, workflow)

    # Creation of the submission script

    # Detection of the host
    hostname = socket.gethostname()
    # hostname = "thetagpusn1"
    host = None
    if "thetagpu" in hostname:
        host = "thetagpu"
        print("ThetaGPU detected")
    else:
        print(
            f"There exist no submission policy for the current system: '{hostname}'"
        )
        exit()

    # Load submission template
    job_template_path = os.path.join(MODULE_PATH, "job-templates-ray",
                                     f"{host}.submission.tmpl")

    with open(job_template_path, "r") as f:
        template = Template(f.read())

    # Load script to launch ray cluster template
    if nodes > 1:  # mutliple nodes
        launch_ray_path = os.path.join(MODULE_PATH, "job-templates-ray",
                                       f"{host}.MultiNodesRayCluster.tmpl")
    else:  # single node
        launch_ray_path = os.path.join(MODULE_PATH, "job-templates-ray",
                                       f"{host}.SingleNodeRayCluster.tmpl")

    with open(launch_ray_path, "r") as f:
        template_launch_ray = Template(f.read())

    # Render script to launch ray cluster
    script_launch_ray_cluster = template_launch_ray.render()

    # Create workflow directory and move ot it
    current_dir = os.getcwd()
    exp_dir = os.path.join(current_dir, workflow)
    if not (os.path.exists(exp_dir)):
        create_dir(exp_dir)
    os.chdir(exp_dir)

    # resolve the evaluator to use
    if horovod:
        evaluator = "rayhorovod"
    else:
        evaluator = "ray"

    # Render submission template
    submission_path = os.path.join(exp_dir, f"sub_{workflow}.sh")
    with open(submission_path, "w") as fp:
        fp.write(
            template.render(
                mode=mode,
                search=search,
                evaluator=evaluator,
                problem=problem,
                run=run,
                time_minutes=time_minutes,
                nodes=nodes,
                queue=queue,
                project=project,
                max_evals=max_evals,
                num_cpus_per_task=num_cpus_per_task,
                num_gpus_per_task=num_gpus_per_task,
                script_launch_ray_cluster=script_launch_ray_cluster,
                activation_script=activation_script,
                other_search_arguments=generate_other_arguments(**kwargs),
            ))
        print("Created", fp.name)

    # add executable rights
    st = os.stat(submission_path)
    os.chmod(submission_path, st.st_mode | stat.S_IEXEC)

    # Job submission
    print("Performing job submission...")
    cmd = f"qsub {submission_path}"
    os.system(cmd)

    banner(f"Success. The search will run at: {exp_dir}")