コード例 #1
0
ファイル: bohb.py プロジェクト: jakeyoo87/DRL2021-Project
def run_bohb(runtime, b, cs):
    min_budget = 4
    max_budget = 108
    hb_run_id = '0'
    NS = hpns.NameServer(run_id=hb_run_id, host='localhost', port=0)
    ns_host, ns_port = NS.start()
    num_workers = 1
    workers = []

    for i in range(num_workers):
        w = MyWorker(b=b, run_id=hb_run_id, id=i,
                     nameserver=ns_host, nameserver_port=ns_port)
        w.run(background=True)
        workers.append(w)

    bohb = BOHB(configspace=cs, run_id=hb_run_id,
                min_budget=min_budget, max_budget=max_budget,
                nameserver=ns_host, nameserver_port=ns_port,
                ping_interval=10, min_bandwidth=0.3)

    n_iters = 300
    results = bohb.run(n_iters, min_n_workers=num_workers)

    bohb.shutdown(shutdown_workers=True)
    NS.shutdown()
コード例 #2
0
ファイル: wrapper.py プロジェクト: bornabesic/bohby
def optimize_hyperparameters(model_class,
                             parameters,
                             train_and_validate_fn,
                             num_iterations,
                             min_budget=0.01,
                             working_dir="./bohby_workspace/"):
    # Make sure the working directory exists
    os.makedirs(working_dir, exist_ok=True)

    # Generate a configspace from the given parameters
    config_space = generate_configspace(parameters)

    # Start a local nameserver for communication
    NS = hpns.NameServer(run_id=_runid,
                         nic_name="lo",
                         working_directory=working_dir)
    ns_host, ns_port = NS.start()

    # Define the worker
    worker = WrapWorker(model_class,
                        train_and_validate_fn,
                        working_directory=working_dir,
                        nameserver=ns_host,
                        nameserver_port=ns_port,
                        run_id=_runid)
    worker.run(background=True)

    # Enable live logging so a run can be canceled at any time and we can still recover the results
    result_logger = json_result_logger(directory=working_dir, overwrite=True)

    # Optimization
    bohb = BOHB(configspace=config_space,
                working_directory=working_dir,
                run_id=_runid,
                eta=2,
                min_budget=min_budget,
                max_budget=1,
                host=ns_host,
                nameserver=ns_host,
                nameserver_port=ns_port,
                ping_interval=3600,
                result_logger=result_logger)

    res = bohb.run(n_iterations=num_iterations)

    # Clean up
    bohb.shutdown(shutdown_workers=True)
    NS.shutdown()

    # Best found config
    run_results = hpres.logged_results_to_HB_result(working_dir)
    id2conf = run_results.get_id2config_mapping()

    incumbent_id = run_results.get_incumbent_id()
    incumbent_config = id2conf[incumbent_id]['config']
    incumbent_runs = run_results.get_runs_by_id(incumbent_id)

    val_loss = incumbent_runs[-1].loss

    return val_loss, incumbent_config
コード例 #3
0
def main(xargs, api):
  torch.set_num_threads(4)
  prepare_seed(xargs.rand_seed)
  logger = prepare_logger(args)

  logger.log('{:} use api : {:}'.format(time_string(), api))
  api.reset_time()
  search_space = get_search_spaces(xargs.search_space, 'nats-bench')
  if xargs.search_space == 'tss':
    cs = get_topology_config_space(search_space)
    config2structure = config2topology_func()
  else:
    cs = get_size_config_space(search_space)
    config2structure = config2size_func(search_space)
  
  hb_run_id = '0'

  NS = hpns.NameServer(run_id=hb_run_id, host='localhost', port=0)
  ns_host, ns_port = NS.start()
  num_workers = 1

  workers = []
  for i in range(num_workers):
    w = MyWorker(nameserver=ns_host, nameserver_port=ns_port, convert_func=config2structure, dataset=xargs.dataset, api=api, run_id=hb_run_id, id=i)
    w.run(background=True)
    workers.append(w)

  start_time = time.time()
  bohb = BOHB(configspace=cs, run_id=hb_run_id,
      eta=3, min_budget=1, max_budget=12,
      nameserver=ns_host,
      nameserver_port=ns_port,
      num_samples=xargs.num_samples,
      random_fraction=xargs.random_fraction, bandwidth_factor=xargs.bandwidth_factor,
      ping_interval=10, min_bandwidth=xargs.min_bandwidth)
  
  results = bohb.run(xargs.n_iters, min_n_workers=num_workers)

  bohb.shutdown(shutdown_workers=True)
  NS.shutdown()

  # print('There are {:} runs.'.format(len(results.get_all_runs())))
  # workers[0].total_times
  # workers[0].trajectory
  current_best_index = []
  for idx in range(len(workers[0].trajectory)):
    trajectory = workers[0].trajectory[:idx+1]
    arch = max(trajectory, key=lambda x: x[0])[1]
    current_best_index.append(api.query_index_by_arch(arch))
  
  best_arch = max(workers[0].trajectory, key=lambda x: x[0])[1]
  logger.log('Best found configuration: {:} within {:.3f} s'.format(best_arch, workers[0].total_times[-1]))
  info = api.query_info_str_by_arch(best_arch, '200' if xargs.search_space == 'tss' else '90')
  logger.log('{:}'.format(info))
  logger.log('-'*100)
  logger.close()

  return logger.log_dir, current_best_index, workers[0].total_times
コード例 #4
0
def main(xargs, nas_bench):
    assert torch.cuda.is_available(), "CUDA is not available."
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    torch.set_num_threads(xargs.workers)
    prepare_seed(xargs.rand_seed)
    logger = prepare_logger(args)

    if xargs.dataset == "cifar10":
        dataname = "cifar10-valid"
    else:
        dataname = xargs.dataset
    if xargs.data_path is not None:
        train_data, valid_data, xshape, class_num = get_datasets(
            xargs.dataset, xargs.data_path, -1)
        split_Fpath = "configs/nas-benchmark/cifar-split.txt"
        cifar_split = load_config(split_Fpath, None, None)
        train_split, valid_split = cifar_split.train, cifar_split.valid
        logger.log("Load split file from {:}".format(split_Fpath))
        config_path = "configs/nas-benchmark/algos/R-EA.config"
        config = load_config(config_path, {
            "class_num": class_num,
            "xshape": xshape
        }, logger)
        # To split data
        train_data_v2 = deepcopy(train_data)
        train_data_v2.transform = valid_data.transform
        valid_data = train_data_v2
        search_data = SearchDataset(xargs.dataset, train_data, train_split,
                                    valid_split)
        # data loader
        train_loader = torch.utils.data.DataLoader(
            train_data,
            batch_size=config.batch_size,
            sampler=torch.utils.data.sampler.SubsetRandomSampler(train_split),
            num_workers=xargs.workers,
            pin_memory=True,
        )
        valid_loader = torch.utils.data.DataLoader(
            valid_data,
            batch_size=config.batch_size,
            sampler=torch.utils.data.sampler.SubsetRandomSampler(valid_split),
            num_workers=xargs.workers,
            pin_memory=True,
        )
        logger.log(
            "||||||| {:10s} ||||||| Train-Loader-Num={:}, Valid-Loader-Num={:}, batch size={:}"
            .format(xargs.dataset, len(train_loader), len(valid_loader),
                    config.batch_size))
        logger.log("||||||| {:10s} ||||||| Config={:}".format(
            xargs.dataset, config))
        extra_info = {
            "config": config,
            "train_loader": train_loader,
            "valid_loader": valid_loader,
        }
    else:
        config_path = "configs/nas-benchmark/algos/R-EA.config"
        config = load_config(config_path, None, logger)
        logger.log("||||||| {:10s} ||||||| Config={:}".format(
            xargs.dataset, config))
        extra_info = {
            "config": config,
            "train_loader": None,
            "valid_loader": None
        }

    # nas dataset load
    assert xargs.arch_nas_dataset is not None and os.path.isfile(
        xargs.arch_nas_dataset)
    search_space = get_search_spaces("cell", xargs.search_space_name)
    cs = get_configuration_space(xargs.max_nodes, search_space)

    config2structure = config2structure_func(xargs.max_nodes)
    hb_run_id = "0"

    NS = hpns.NameServer(run_id=hb_run_id, host="localhost", port=0)
    ns_host, ns_port = NS.start()
    num_workers = 1

    # nas_bench = AANASBenchAPI(xargs.arch_nas_dataset)
    # logger.log('{:} Create NAS-BENCH-API DONE'.format(time_string()))
    workers = []
    for i in range(num_workers):
        w = MyWorker(
            nameserver=ns_host,
            nameserver_port=ns_port,
            convert_func=config2structure,
            dataname=dataname,
            nas_bench=nas_bench,
            time_budget=xargs.time_budget,
            run_id=hb_run_id,
            id=i,
        )
        w.run(background=True)
        workers.append(w)

    start_time = time.time()
    bohb = BOHB(
        configspace=cs,
        run_id=hb_run_id,
        eta=3,
        min_budget=12,
        max_budget=200,
        nameserver=ns_host,
        nameserver_port=ns_port,
        num_samples=xargs.num_samples,
        random_fraction=xargs.random_fraction,
        bandwidth_factor=xargs.bandwidth_factor,
        ping_interval=10,
        min_bandwidth=xargs.min_bandwidth,
    )

    results = bohb.run(xargs.n_iters, min_n_workers=num_workers)

    bohb.shutdown(shutdown_workers=True)
    NS.shutdown()

    real_cost_time = time.time() - start_time

    id2config = results.get_id2config_mapping()
    incumbent = results.get_incumbent_id()
    logger.log("Best found configuration: {:} within {:.3f} s".format(
        id2config[incumbent]["config"], real_cost_time))
    best_arch = config2structure(id2config[incumbent]["config"])

    info = nas_bench.query_by_arch(best_arch, "200")
    if info is None:
        logger.log("Did not find this architecture : {:}.".format(best_arch))
    else:
        logger.log("{:}".format(info))
    logger.log("-" * 100)

    logger.log("workers : {:.1f}s with {:} archs".format(
        workers[0].time_budget, len(workers[0].seen_archs)))
    logger.close()
    return logger.log_dir, nas_bench.query_index_by_arch(
        best_arch), real_cost_time
コード例 #5
0
                    seed=args.seed,
                    nameserver=ns_host,
                    nameserver_port=ns_port,
                    run_id=args.run_id)
    worker.run(background=True)

    #instantiate BOHB and run it
    result_logger = hputil.json_result_logger(directory=args.working_directory,
                                              overwrite=True)

    HPB = BOHB(configspace=worker.get_config_space(),
               working_directory=args.working_directory,
               run_id=args.run_id,
               eta=eta,
               min_budget=min_budget,
               max_budget=max_budget,
               host=ns_host,
               nameserver=ns_host,
               nameserver_port=ns_port,
               ping_interval=3600,
               result_logger=result_logger)

    res = HPB.run(n_iterations=args.num_iterations,
                  min_n_workers=args.total_num_workers)

    with open(os.path.join(args.working_directory, 'results.pkl'), 'wb') as fh:
        pickle.dump(res, fh)

    HPB.shutdown(shutdown_workers=True)
    NS.shutdown()
コード例 #6
0
workers = []
for i in range(num_workers):
    w = MyWorker(nameserver=ns_host,
                 nameserver_port=ns_port,
                 run_id=hb_run_id,
                 id=i)
    w.run(background=True)
    workers.append(w)

bohb = BOHB(
    configspace=cs,
    run_id=hb_run_id,
    eta=3,
    min_budget=min_budget,
    max_budget=max_budget,
    nameserver=ns_host,
    nameserver_port=ns_port,
    # optimization_strategy=args.strategy,
    num_samples=args.num_samples,
    random_fraction=args.random_fraction,
    bandwidth_factor=args.bandwidth_factor,
    ping_interval=10,
    min_bandwidth=args.min_bandwidth)

results = bohb.run(args.n_iters, min_n_workers=num_workers)

bohb.shutdown(shutdown_workers=True)
NS.shutdown()
time.sleep(5)

if args.benchmark == "nas_cifar10a" or args.benchmark == "nas_cifar10b" or args.benchmark == "nas_cifar10c":
    res = b.get_results(ignore_invalid_configs=True)
コード例 #7
0
                      nameserver_port=ns_port,
                      run_id=hb_run_id,
                      id=0)
    evaluator = UltraoptEvaluator(data, 'balanced_accuracy')
    worker.evaluator = evaluator
    worker.run(background=True)
    HDL = get_no_ordinal_HDL()
    CS = hdl2cs(HDL)
    CS.seed(trial * 10 + 5)
    bohb = BOHB(
        configspace=CS,
        run_id=hb_run_id,
        # just test KDE
        eta=2,
        min_budget=1,
        max_budget=1,
        nameserver=ns_host,
        nameserver_port=ns_port,
        num_samples=64,
        random_fraction=33,
        bandwidth_factor=3,
        ping_interval=10,
        min_bandwidth=.3)

    results = bohb.run(max_iter, min_n_workers=num_workers)

    bohb.shutdown(shutdown_workers=True)
    res[f"trial-{trial}"] = evaluator.losses
NS.shutdown()
time.sleep(1)
res = raw2min(res)
m = res.mean(1)
コード例 #8
0
def main(xargs, nas_bench):
    assert torch.cuda.is_available(), 'CUDA is not available.'
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    torch.set_num_threads(xargs.workers)
    prepare_seed(xargs.rand_seed)
    logger = prepare_logger(args)

    assert xargs.dataset == 'cifar10', 'currently only support CIFAR-10'
    train_data, valid_data, xshape, class_num = get_datasets(
        xargs.dataset, xargs.data_path, -1)
    split_Fpath = 'configs/nas-benchmark/cifar-split.txt'
    cifar_split = load_config(split_Fpath, None, None)
    train_split, valid_split = cifar_split.train, cifar_split.valid
    logger.log('Load split file from {:}'.format(split_Fpath))
    config_path = 'configs/nas-benchmark/algos/R-EA.config'
    config = load_config(config_path, {
        'class_num': class_num,
        'xshape': xshape
    }, logger)
    # To split data
    train_data_v2 = deepcopy(train_data)
    train_data_v2.transform = valid_data.transform
    valid_data = train_data_v2
    search_data = SearchDataset(xargs.dataset, train_data, train_split,
                                valid_split)
    # data loader
    train_loader = torch.utils.data.DataLoader(
        train_data,
        batch_size=config.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(train_split),
        num_workers=xargs.workers,
        pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(
        valid_data,
        batch_size=config.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(valid_split),
        num_workers=xargs.workers,
        pin_memory=True)
    logger.log(
        '||||||| {:10s} ||||||| Train-Loader-Num={:}, Valid-Loader-Num={:}, batch size={:}'
        .format(xargs.dataset, len(train_loader), len(valid_loader),
                config.batch_size))
    logger.log('||||||| {:10s} ||||||| Config={:}'.format(
        xargs.dataset, config))
    extra_info = {
        'config': config,
        'train_loader': train_loader,
        'valid_loader': valid_loader
    }

    # nas dataset load
    assert xargs.arch_nas_dataset is not None and os.path.isfile(
        xargs.arch_nas_dataset)
    search_space = get_search_spaces('cell', xargs.search_space_name)
    cs = get_configuration_space(xargs.max_nodes, search_space)

    config2structure = config2structure_func(xargs.max_nodes)
    hb_run_id = '0'

    NS = hpns.NameServer(run_id=hb_run_id, host='localhost', port=0)
    ns_host, ns_port = NS.start()
    num_workers = 1

    #nas_bench = AANASBenchAPI(xargs.arch_nas_dataset)
    #logger.log('{:} Create NAS-BENCH-API DONE'.format(time_string()))
    workers = []
    for i in range(num_workers):
        w = MyWorker(nameserver=ns_host,
                     nameserver_port=ns_port,
                     convert_func=config2structure,
                     nas_bench=nas_bench,
                     run_id=hb_run_id,
                     id=i)
        w.run(background=True)
        workers.append(w)

    bohb = BOHB(configspace=cs,
                run_id=hb_run_id,
                eta=3,
                min_budget=3,
                max_budget=108,
                nameserver=ns_host,
                nameserver_port=ns_port,
                num_samples=xargs.num_samples,
                random_fraction=xargs.random_fraction,
                bandwidth_factor=xargs.bandwidth_factor,
                ping_interval=10,
                min_bandwidth=xargs.min_bandwidth)
    #          optimization_strategy=xargs.strategy, num_samples=xargs.num_samples,

    results = bohb.run(xargs.n_iters, min_n_workers=num_workers)

    bohb.shutdown(shutdown_workers=True)
    NS.shutdown()

    id2config = results.get_id2config_mapping()
    incumbent = results.get_incumbent_id()

    logger.log('Best found configuration: {:}'.format(
        id2config[incumbent]['config']))
    best_arch = config2structure(id2config[incumbent]['config'])

    info = nas_bench.query_by_arch(best_arch)
    if info is None:
        logger.log('Did not find this architecture : {:}.'.format(best_arch))
    else:
        logger.log('{:}'.format(info))
    logger.log('-' * 100)

    logger.log('workers : {:}'.format(workers[0].test_time))
    logger.close()
    return logger.log_dir, nas_bench.query_index_by_arch(best_arch)
コード例 #9
0
ファイル: abas.py プロジェクト: lr94/abas
def main():
    args = parse_args()

    # Set log level
    logging.basicConfig(level={
        'critical': logging.CRITICAL,
        'warning': logging.WARNING,
        'info': logging.INFO,
        'debug': logging.DEBUG
    }[args.loglevel])

    # Name for the current experiment (optimization, not single training)
    exp_name = args.exp_name or get_default_exp_name(args) + (('_' + args.exp_suffix) if args.exp_suffix else '')

    logdir = os.path.join(args.logdir, exp_name)
    shared_dir = os.path.join(logdir, 'master')
    os.makedirs(shared_dir, exist_ok=True)  # Also creates logdir if it does not exist

    host = hpns.nic_name_to_host(args.nic_name)

    # If this is meant to be a worker process, launch it
    if args.worker:
        w = AbasWorker(run_id=exp_name,
                       source=args.source,
                       target=args.target,
                       net=args.net,
                       load_workers=args.load_workers,
                       max_iter=args.max_iter,
                       logdir=args.logdir,
                       ds_root=args.data_root,
                       no_tqdm=args.no_tqdm,
                       gpu=args.gpu,
                       run_n_avg=args.run_n_avg,
                       da_method=args.da,
                       model_criterion=args.criterion,
                       run_model_criterion=args.run_criterion or args.criterion,
                       kill_diverging=args.kill_diverging,

                       host=host,
                       timeout=args.timeout)
        w.load_nameserver_credentials(working_directory=shared_dir)
        w.run(background=False)
        # Nothing to do, exit
        print("Done")
        exit(0)

    # If we are here we expect to be a master
    if not args.master:
        print("Nothing to do (not a master nor a worker process)")
        exit(1)

    # Running as master!

    # Log info
    Logger(logdir=logdir, run_name='master', use_tqdm=False, use_tb=False)

    # Init the nameserver (random port)
    ns = hpns.NameServer(run_id=exp_name, host=host, port=0, working_directory=shared_dir)
    ns_host, ns_port = ns.start()
    print("Nameserver on {}:{}".format(ns_host, ns_port))

    # These hyperparameters are passed through the command line and are not optimized
    hp = {
        'base.lr': args.lr,
        'base.bs': args.bs,
        'base.wd': args.wd,
    }

    # Load previous runs
    previous_res = None
    if args.previous != '':
        if os.path.isdir(args.previous):
            previous_res = hpres.logged_results_to_HBS_result(args.previous)
        else:
            with open(args.previous, 'rb') as fp:
                previous_res = pickle.load(fp)

    # Safe file removal
    remove_file(os.path.join(shared_dir, 'config.json'))
    remove_file(os.path.join(shared_dir, 'results.json'))

    # Launch BOHB
    opt_logger = hpres.json_result_logger(directory=shared_dir, overwrite=False)
    bohb = BOHB(
        configspace=AbasWorker.get_configspace(hp),
        previous_result=previous_res,
        run_id=exp_name,

        min_budget=args.min_budget, max_budget=args.max_budget,
        eta=args.eta,

        host=host,
        nameserver=ns_host, nameserver_port=ns_port,
        ping_interval=15,

        result_logger=opt_logger
    )

    res = bohb.run(n_iterations=args.num_iterations, min_n_workers=args.num_workers)

    # Done
    bohb.shutdown(shutdown_workers=True)
    ns.shutdown()

    # Save results
    id2config = res.get_id2config_mapping()
    incumbent = res.get_incumbent_id()

    all_runs = res.get_all_runs()

    with open(os.path.join(logdir, 'result_{}.pkl'.format(exp_name)), 'wb') as fp:
        pickle.dump(res, fp)

    print(f"Best found configuration: {id2config[incumbent]['config']}")
    print(f"Total number of sampled unique configurations: {len(id2config.keys())}")
    print(f"Total runs {len(res.get_all_runs())}")
    print("ABAS run took {:.1f} seconds".format(
            all_runs[-1].time_stamps['finished'] - all_runs[0].time_stamps['started']))
コード例 #10
0
    NS = hpns.NameServer(run_id='example1',
                         host='127.0.0.1',
                         port=0,
                         working_directory=working_dir)
    ns_host, ns_port = NS.start()

    worker = KerasWorker(nameserver=ns_host,
                         nameserver_port=ns_port,
                         run_id=run_id,
                         timeout=120)
    worker.run(background=True)

    bohb = BOHB(configspace=worker.get_configspace(),
                working_directory=working_dir,
                run_id=run_id,
                min_budget=min_num_epochs,
                max_budget=max_num_epochs,
                host=ns_host,
                nameserver=ns_host,
                nameserver_port=ns_port,
                result_logger=result_logger)

    res = bohb.run(n_iterations=n_iterations)

    # store results
    with open(os.path.join(working_dir, 'results.pkl'), 'wb') as fh:
        pickle.dump(res, fh)

    # shutdown
    bohb.shutdown(shutdown_workers=True)
    NS.shutdown()
コード例 #11
0
    num_workers = 1
    min_budget = 30
    max_budget = 90

    # initialise BOHB
    NS = hpns.NameServer(run_id=hb_run_id, host='localhost', port=0)
    ns_host, ns_port = NS.start()
    workers = []
    for i in range(num_workers):
        w = MyWorker(nameserver=ns_host, nameserver_port=ns_port,
                     run_id=hb_run_id, id=i)
        w.run(background=True)
        workers.append(w)

    bohb = BOHB(configspace=search_space, run_id=hb_run_id,
                eta=3, min_budget=min_budget, max_budget=max_budget,
                nameserver=ns_host, nameserver_port=ns_port,
                ping_interval=10, min_bandwidth=min_bandwidth)

    # run BOHB
    results = bohb.run(int(args.n_iters+args.n_init), min_n_workers=num_workers)
    bohb.shutdown(shutdown_workers=True)
    NS.shutdown()

    # process the returned results to give the same format
    bo_results = []
    curr_best = np.inf
    for item in results.data.items():
        key, datum = item
        budget = datum.budget
        hyperparams = datum.config
        object_values = datum.results[budget]['loss']
コード例 #12
0
    workers.append(w)

# Step 3:
# In the last of the 3 Steps, we create a optimizer object.
# It samples configurations from the ConfigurationSpace.
# The number of sampled configurations is determined by the
# parameters eta, min_budget and max_budget.
# After evaluating each configuration, starting with the minimum budget
# on the same subset size, only a fraction of 1 / eta of them
# 'advances' to the next round. At the same time the current budget will be doubled.
# This process runs until the maximum budget is reached.
HB = BOHB(
    configspace=config_space,
    run_id=run_id,
    eta=3,
    min_budget=9,
    max_budget=243,  # HB parameters
    nameserver=ns_host,
    nameserver_port=ns_port,
    result_logger=result_logger,
    ping_interval=10**6)

# Then start the optimizer. The n_iterations parameter specifies
# the number of iterations to be performed in this run
# It will wait till minimum n workers are ready
HB.run(n_iterations=4, min_n_workers=num_workers)

# After the optimizer run, we shutdown the master.
HB.shutdown(shutdown_workers=True)
NS.shutdown()

# Just to demonstrate, let's read in the logged runs rather than the returned result from HB.run