Example #1
0
    def run_worker(self, pipeline_config, run_id, task_id, ns_credentials_dir,
                   network_interface_name, X_train, Y_train, X_valid, Y_valid,
                   dataset_info, shutdownables):
        if not task_id == -1:
            time.sleep(5)
        while not os.path.isdir(ns_credentials_dir):
            time.sleep(5)
        host = nic_name_to_host(network_interface_name)

        worker = ModuleWorker(
            pipeline=self.sub_pipeline,
            pipeline_config=pipeline_config,
            X_train=X_train,
            Y_train=Y_train,
            X_valid=X_valid,
            Y_valid=Y_valid,
            dataset_info=dataset_info,
            budget_type=self.budget_types[pipeline_config['budget_type']],
            max_budget=pipeline_config["max_budget"],
            host=host,
            run_id=run_id,
            id=task_id,
            shutdownables=shutdownables,
            use_pynisher=pipeline_config["use_pynisher"])
        worker.load_nameserver_credentials(ns_credentials_dir)
        # run in background if not on cluster
        worker.run(background=(task_id <= 1))
Example #2
0
def main(args):
    args.run_id = args.job_id or args.experiment_name
    args.host = hpns.nic_name_to_host(args.nic_name)

    args.bohb_root_path = str(
        Path("experiments", args.experiment_group, args.experiment_name))

    args.dataset = args.experiment_name

    # Handle case of budget dictating n_repeat vs. n_repeat directly
    if args.n_repeat_lower_budget is not None and args.n_repeat_upper_budget is not None:
        args.n_repeat = None
    else:
        args.n_repeat_lower_budget = 1
        args.n_repeat_upper_budget = 1

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    tf.set_random_seed(args.seed)

    if args.worker:
        run_worker(args)
    else:
        run_master(args)
    def run_worker(self, pipeline_config, constant_hyperparameter, run_id,
                   task_id, ns_credentials_dir, network_interface_name,
                   X_train, Y_train, X_valid, Y_valid):
        if not task_id == -1:
            time.sleep(5)
        while not os.path.isdir(ns_credentials_dir):
            time.sleep(5)
        host = nic_name_to_host(network_interface_name)

        worker = ModuleWorkerNoTimeLimit(
            pipeline=self.sub_pipeline,
            pipeline_config=pipeline_config,
            constant_hyperparameter=constant_hyperparameter,
            X_train=X_train,
            Y_train=Y_train,
            X_valid=X_valid,
            Y_valid=Y_valid,
            budget_type=pipeline_config['budget_type'],
            max_budget=pipeline_config["max_budget"],
            host=host,
            run_id=run_id,
            id=task_id,
            working_directory=pipeline_config["result_logger_dir"],
            permutations=self.permutations)
        worker.load_nameserver_credentials(ns_credentials_dir)
        # run in background if not on cluster
        worker.run(background=(task_id <= 1))
Example #4
0
 def run_worker(self, pipeline_config, run_id, task_id, ns_credentials_dir, network_interface_name,
         X_train, Y_train, X_valid, Y_valid, dataset_info, shutdownables):
     """ Run the AutoNetWorker
     
     Arguments:
         pipeline_config {dict} -- The configuration of the pipeline
         run_id {str} -- An id for the run
         task_id {int} -- An id for the worker
         ns_credentials_dir {str} -- path to nameserver credentials
         network_interface_name {str} -- the name of the network interface
         X_train {array} -- The data
         Y_train {array} -- The data
         X_valid {array} -- The data
         Y_valid {array} -- The data
         dataset_info {DatasetInfo} -- Object describing the dataset
         shutdownables {list} -- A list of objects that need to shutdown when the optimization is finished
     """
     if not task_id == -1:
         time.sleep(5)
     while not os.path.isdir(ns_credentials_dir):
         time.sleep(5)
     host = nic_name_to_host(network_interface_name)
     
     worker = AutoNetWorker(pipeline=self.sub_pipeline, pipeline_config=pipeline_config,
                           X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid, dataset_info=dataset_info,
                           budget_type=self.budget_types[pipeline_config['budget_type']],
                           max_budget=pipeline_config["max_budget"],
                           host=host, run_id=run_id,
                           id=task_id, shutdownables=shutdownables,
                           use_pynisher=pipeline_config["use_pynisher"])
     worker.load_nameserver_credentials(ns_credentials_dir)
     # run in background if not on cluster
     worker.run(background=(task_id <= 1))
def run_bohb_parallel(id, run_id, bohb_workers):
    # get bohb params
    bohb_params = get_bohb_parameters()

    # get suitable interface (eth0 or lo)
    bohb_interface = get_bohb_interface()

    # get BOHB log directory
    working_dir = get_working_dir(run_id)

    # every process has to lookup the hostname
    host = hpns.nic_name_to_host(bohb_interface)

    os.makedirs(working_dir, exist_ok=True)

    if int(id) > 0:
        print('START NEW WORKER')
        time.sleep(10)
        w = BohbWorker(host=host, run_id=run_id, working_dir=working_dir)
        w.load_nameserver_credentials(working_directory=working_dir)
        w.run(background=False)
        exit(0)

    print('START NEW MASTER')
    ns = hpns.NameServer(run_id=run_id,
                         host=host,
                         port=0,
                         working_directory=working_dir)
    ns_host, ns_port = ns.start()

    w = BohbWorker(host=host,
                   nameserver=ns_host,
                   nameserver_port=ns_port,
                   run_id=run_id,
                   working_dir=working_dir)
    w.run(background=True)

    result_logger = hpres.json_result_logger(directory=working_dir,
                                             overwrite=True)

    bohb = BohbWrapper(configspace=get_configspace(),
                       run_id=run_id,
                       eta=bohb_params['eta'],
                       host=host,
                       nameserver=ns_host,
                       nameserver_port=ns_port,
                       min_budget=bohb_params['min_budget'],
                       max_budget=bohb_params['max_budget'],
                       result_logger=result_logger)

    res = bohb.run(n_iterations=bohb_params['iterations'],
                   min_n_workers=int(bohb_workers))

    bohb.shutdown(shutdown_workers=True)
    ns.shutdown()

    return res
Example #6
0
    def test_Timeout(self):

        host = hpn.nic_name_to_host('lo')

        with tempfile.TemporaryDirectory() as working_directory:

            # start up nameserver
            ns = hpn.NameServer(self.run_id,
                                working_directory=working_directory,
                                host=host)
            ns_host, ns_port = ns.start()

            # create workers and connect them to the nameserver
            workers = []
            for i in range(3):
                w = TestWorker(run_id=self.run_id,
                               sleep_duration=2,
                               timeout=1,
                               host=host,
                               id=i)
                w.load_nameserver_credentials(working_directory)
                w.run(background=True)
                workers.append(w)

            # at this point all workers must still be alive
            alive = [w.thread.is_alive() for w in workers]
            self.assertTrue(all(alive))

            opt = HyperBand(run_id=self.run_id,
                            configspace=self.configspace,
                            nameserver=ns_host,
                            nameserver_port=ns_port,
                            min_budget=1,
                            max_budget=3,
                            eta=3,
                            ping_interval=1)
            opt.run(1, min_n_workers=3)

            # only one worker should be alive when the run is done
            alive = [w.thread.is_alive() for w in workers]
            self.assertEqual(1, sum(alive))

            opt.shutdown()
            time.sleep(2)

            # at this point all workers should have finished
            alive = [w.thread.is_alive() for w in workers]
            self.assertFalse(any(alive))

            # shutdown the nameserver before the temporary directory is gone
            ns.shutdown()
Example #7
0
    def fit(self, pipeline_config, result_loggers, shutdownables, refit=False):
        if refit or pipeline_config["ensemble_size"] == 0:
            return dict()
        es_credentials_file = os.path.join(pipeline_config["working_dir"], "es_credentials_%s.json" % pipeline_config["run_id"])

        # start server
        if pipeline_config["task_id"] != 1 or pipeline_config["run_worker_on_master_node"]:
            host = nic_name_to_host(OptimizationAlgorithm.get_nic_name(pipeline_config))
            host, port, process = start_server(host)
            pipeline_config["ensemble_server_credentials"] = (host, port)
            shutdownables = shutdownables + [process]

        result_loggers = [ensemble_logger(directory=pipeline_config["result_logger_dir"], overwrite=True)] + result_loggers
        return {"result_loggers": result_loggers, "shutdownables": shutdownables}
Example #8
0
    def test_optimizers(self):
        optimizers = [BOHB, H2BO, RandomSearch]

        for optimizer in optimizers:
            host = hpn.nic_name_to_host('lo')

            with tempfile.TemporaryDirectory() as working_directory:
                # start up nameserver
                ns = hpn.NameServer(self.run_id,
                                    working_directory=working_directory,
                                    host=host)
                ns_host, ns_port = ns.start()

                # create workers and connect them to the nameserver
                w = TestWorker(run_id=self.run_id,
                               sleep_duration=2,
                               timeout=1,
                               host=host,
                               id=1)
                w.load_nameserver_credentials(working_directory)
                w.run(background=True)

                opt = optimizer(run_id=self.run_id,
                                configspace=self.configspace,
                                nameserver=ns_host,
                                nameserver_port=ns_port,
                                min_budget=1,
                                max_budget=3,
                                eta=3,
                                ping_interval=1)
                opt.run(1, min_n_workers=1)

                opt.shutdown()
                time.sleep(2)

                # shutdown the nameserver before the temporary directory is gone
                ns.shutdown()
Example #9
0
    def test_Timeout(self):
        class dummy_callback(object):
            def register_result(self, *args, **kwargs):
                pass

        host = hpn.nic_name_to_host('lo')

        w = TestWorker(run_id=self.run_id,
                       sleep_duration=0,
                       timeout=1,
                       host=host)

        dc = dummy_callback()

        with tempfile.TemporaryDirectory() as working_directory:
            # start up nameserver
            ns = hpn.NameServer(self.run_id,
                                working_directory=working_directory,
                                host=host)
            ns_host, ns_port = ns.start()

            # connect worker to it
            w.load_nameserver_credentials(working_directory)
            w.run(background=True)

            # start a computation with a dummy callback and dummy id
            w.start_computation(dc, '0')

            # at this point the worker must still be alive
            self.assertTrue(w.thread.is_alive())

            # as the timeout is only 1, after 2 seconds, the worker thread should be dead
            time.sleep(2)
            self.assertFalse(w.thread.is_alive())

            # shutdown the nameserver before the temporary directory is gone
            ns.shutdown()
def runBohbParallel(id, run_id):
    # get suitable interface (eth0 or lo)
    bohb_interface = get_bohb_interface()

    # get BOHB log directory
    working_dir = get_working_dir(run_id)

    # select whether to process NLP or speech datasets
    use_nlp = 'NLP' in run_id

    # every process has to lookup the hostname
    host = hpns.nic_name_to_host(bohb_interface)

    os.makedirs(working_dir, exist_ok=True)

    if int(id) > 0:
        print('START NEW WORKER')
        time.sleep(10)
        w = BOHBWorker(host=host,
                       run_id=run_id,
                       working_dir=working_dir,
                       use_nlp=use_nlp)
        w.load_nameserver_credentials(working_directory=working_dir)
        w.run(background=False)
        exit(0)

    print('START NEW MASTER')
    ns = hpns.NameServer(run_id=run_id,
                         host=host,
                         port=0,
                         working_directory=working_dir)
    ns_host, ns_port = ns.start()

    w = BOHBWorker(host=host,
                   nameserver=ns_host,
                   nameserver_port=ns_port,
                   run_id=run_id,
                   working_dir=working_dir,
                   use_nlp=use_nlp)
    w.run(background=True)

    result_logger = hpres.json_result_logger(directory=working_dir,
                                             overwrite=True)

    bohb = BohbWrapper(configspace=get_configspace(use_nlp),
                       run_id=run_id,
                       eta=BOHB_ETA,
                       host=host,
                       nameserver=ns_host,
                       nameserver_port=ns_port,
                       min_budget=BOHB_MIN_BUDGET,
                       max_budget=BOHB_MAX_BUDGET,
                       result_logger=result_logger)

    res = bohb.run(n_iterations=BOHB_ITERATIONS, min_n_workers=BOHB_WORKERS)
    #    res = bohb.run(n_iterations=BOHB_ITERATIONS)

    bohb.shutdown(shutdown_workers=True)
    ns.shutdown()

    return res
def dqn_bohb_wrapper(**params):

    # Setup directories where live data is logged
    logdir = params["logdir"]
    dqn_output_dir = os.path.join(logdir, 'dqn_output')
    # if not os.path.isdir(dqn_output_dir):
    #     os.makedirs(dqn_output_dir)
    params["logdir"] = dqn_output_dir

    bohb_output_dir = os.path.join(logdir, 'bohb_output')
    # if not os.path.isdir(bohb_output_dir):
    #     os.makedirs(bohb_output_dir)

    logging.basicConfig(level=logging.INFO)  # logging.DEBUG for debug output
    logger = logging.getLogger()
    logger.propagate = False  # no duplicate logging outputs
    fh = logging.FileHandler(os.path.join(logdir, 'bohb.log'))
    fh.setLevel(logging.INFO)
    fh.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s:%(name)s: %(message)s'))
    logger.addHandler(fh)

    # Build configuration space and define all hyperparameters
    cs = ConfigurationSpace()
    epsilon = UniformFloatHyperparameter("epsilon", 0.2, 0.9, default_value=0.6)  # initial epsilon
    epsilon_decay = UniformFloatHyperparameter("epsilon_decay", 0.2, 1, default_value=0.995)  # decay rate
    lr = UniformFloatHyperparameter("lr", 0.0005, 0.01, default_value=0.005)
    units_shared_layer1 = UniformIntegerHyperparameter("units_layer1", 8, 100, default_value=24)
    units_shared_layer2 = UniformIntegerHyperparameter("units_layer2", 8, 100, default_value=24)
    units_policy_layer = UniformIntegerHyperparameter("units_layer3", 8, 100, default_value=24)
    activ_fcn = CategoricalHyperparameter("activ_fcn", ['relu6', 'elu', 'mixed'], default_value='relu6')
    gamma = UniformFloatHyperparameter("gamma", 0.6, 0.90, default_value=0.80)
    tau = UniformFloatHyperparameter("tau", 0.5, 1., default_value=0.7)
    # update_interval = UniformIntegerHyperparameter("update_interval", 1, 300, default_value=50)
    if params["architecture"] == 'lstm' or (params["architecture"] == 'gru'):
        trace_length = UniformIntegerHyperparameter("trace_length", 1, 20, default_value=8)
        # buffer_condition = LessThanCondition(child=trace_length, parent=params["buffer_size"])
        # pa["batch_size"] = 5
        cs.add_hyperparameters([units_shared_layer1, units_shared_layer2, units_policy_layer,
                                epsilon, epsilon_decay, activ_fcn, lr, gamma, tau, trace_length])
    else:
        params.pop("batch_size")
        batch_size = UniformIntegerHyperparameter("batch_size", 1, 100, default_value=30)
        # buffer_condition = LessThanCondition(child=batch_size, parent=params["buffer_size"], value=33)
        # InCondition(child=batch_size, value=33)
        cs.add_hyperparameters([units_shared_layer1, units_shared_layer2, units_policy_layer,
                                epsilon, epsilon_decay, activ_fcn, lr, gamma, tau, batch_size])

    logger.info('##############################################')
    logger.info('Run Optimization')
    logger.info('##############################################')
    if params["array_id"] == 1:
        # Setup directories where live data is logged
        # logdir = params["logdir"]
        # dqn_output_dir = os.path.join(logdir, 'dqn_output')
        if not os.path.isdir(dqn_output_dir):
            os.makedirs(dqn_output_dir)
        # params["logdir"] = dqn_output_dir

        # bohb_output_dir = os.path.join(logdir, 'bohb_output')
        if not os.path.isdir(bohb_output_dir):
            os.makedirs(bohb_output_dir)

        # start nameserver
        NS = hpns.NameServer(run_id=params["instance_id"], nic_name=params["nic_name"],
                             working_directory=bohb_output_dir)
        ns_host, ns_port = NS.start()  # stores information for workers to find in working directory

        # BOHB is usually so cheap, that we can affort to run a worker on the master node, too.
        worker = DQNWorker(nameserver=ns_host, nameserver_port=ns_port, run_id=params["instance_id"], **params)
        worker.run(background=True)

        # Create scenario object
        logger.info('##############################################')
        logger.info('Setup BOHB instance')
        logger.info('##############################################')

        logger.info('Output_dir: %s' % bohb_output_dir)
        HB = BOHB(configspace=cs,
                  run_id=params["instance_id"],
                  eta=3,
                  min_budget=params["min_resource"],
                  max_budget=params["max_resource"],
                  host=ns_host,
                  nameserver=ns_host,
                  nameserver_port=ns_port,
                  ping_interval=3600)

        res = HB.run(n_iterations=4,
                     min_n_workers=4)  # BOHB can wait until a minimum number of workers is online before starting

        # pickle result here for later analysis
        with open(os.path.join(bohb_output_dir, 'results.pkl'), 'wb') as f:
            pickle.dump(res, f)

        id2config = res.get_id2config_mapping()
        print('A total of %i unique configurations where sampled.' % len(id2config.keys()))
        print('A total of %i runs where executed.' % len(res.get_all_runs()))
        # incumbent_trajectory = res.get_incumbent_trajectory()
        # import matplotlib.pyplot as plt
        # plt.plot(incumbent_trajectory['times_finished'], incumbent_trajectory['losses'])
        # plt.xlabel('wall clock time [s]')
        # plt.ylabel('incumbent loss')
        # plt.show()

        # shutdown all workers
        HB.shutdown(shutdown_workers=True)

        # shutdown nameserver
        NS.shutdown()

    else:
        host = hpns.nic_name_to_host(params["nic_name"])

        # workers only instantiate the MyWorker, find the nameserver and start serving
        w = DQNWorker(run_id=params["instance_id"], host=host, **params)
        w.load_nameserver_credentials(bohb_output_dir)
        # run worker in the forground,
        w.run(background=False)
Example #12
0
def run_opt(args):
    # import PyTorchWorker as worker

    # Every process has to lookup the hostname
    host = hpns.nic_name_to_host(args.nic_name)

    result_logger = hpres.json_result_logger(directory=args.shared_directory,
                                             overwrite=True)

    # Start a nameserver:
    NS = hpns.NameServer(run_id=args.run_id,
                         host=host,
                         port=0,
                         working_directory=args.shared_directory)
    ns_host, ns_port = NS.start()

    # Start local worker
    w = worker(run_id=args.run_id,
               host=host,
               nameserver=ns_host,
               nameserver_port=ns_port,
               timeout=120)
    w.run(background=True)

    if args.method == "BOHB":
        print("[RUNNER] method: BOHB")
        opt = BOHB(
            configspace=worker.get_configspace(),
            run_id=args.run_id,
            host=host,
            nameserver=ns_host,
            nameserver_port=ns_port,
            result_logger=result_logger,
            min_budget=args.min_budget,
            max_budget=args.max_budget,
        )
    elif args.method == "random":
        print("[RUNNER] method: random")
        opt = RandomSearch(
            configspace=worker.get_configspace(),
            run_id=args.run_id,
            host=host,
            nameserver=ns_host,
            nameserver_port=ns_port,
            result_logger=result_logger,
            min_budget=args.min_budget,
            max_budget=args.max_budget,
        )
    elif args.method == "BO":
        print("[RUNNER] method: BO")
        opt = BO_Search(
            configspace=worker.get_configspace(),
            run_id=args.run_id,
            host=host,
            nameserver=ns_host,
            nameserver_port=ns_port,
            result_logger=result_logger,
            min_budget=args.min_budget,
            max_budget=args.max_budget,
        )

    elif args.method == "HB":
        opt = HyperBand(
            configspace=worker.get_configspace(),
            run_id=args.run_id,
            host=host,
            nameserver=ns_host,
            nameserver_port=ns_port,
            result_logger=result_logger,
            min_budget=args.min_budget,
            max_budget=args.max_budget,
        )

    res = opt.run(n_iterations=args.n_iterations)

    # store results
    with open(os.path.join(args.shared_directory, 'results.pkl'), 'wb') as fh:
        pickle.dump(res, fh)

    # shutdown
    opt.shutdown(shutdown_workers=True)
    NS.shutdown()
Example #13
0
# RUN BOHB
import os
working_dir = "./results/BOHB/"
result_file = os.path.join(working_dir, 'bohb_result.pkl')
nic_name = 'lo0'
port = 0
run_id = 'bohb_run_1'
n_bohb_iterations = 10
min_budget = 500
max_budget = 3500

try:
    # Start a nameserver #####
    # get host
    try:
        host = hpns.nic_name_to_host(nic_name)
    except ValueError as e:
        host = "localhost"
        print(e)
        print("ValueError getting host from nic_name {}, "
              "setting to localhost.".format(nic_name))

    ns = hpns.NameServer(run_id=run_id,
                         host=host,
                         port=port,
                         working_directory=working_dir)
    ns_host, ns_port = ns.start()
    print(ns_host)
    print()
    print(ns_port)
Example #14
0
def main():
    parser = argparse.ArgumentParser(
        parents=[get_train_parser()],
        description='Parallel execution of hyper-tuning',
    )
    parser.add_argument('--run-id',
                        required=True,
                        help='Name of the run')
    parser.add_argument('--min-budget',
                        type=float,
                        help='Minimum budget used during the optimization',
                        default=1)
    parser.add_argument('--max-budget',
                        type=float,
                        help='Maximum budget used during the optimization',
                        default=64)
    parser.add_argument('--n-iterations',
                        type=int,
                        help='Number of iterations performed by the optimizer',
                        default=3)
    parser.add_argument('--n-workers',
                        type=int,
                        help='Number of workers to run in parallel',
                        default=3)
    parser.add_argument('--eta',
                        type=int,
                        help='Parameter of the hyper-tuning algorithm',
                        default=4)
    parser.add_argument('--worker',
                        help='Flag to turn this into a worker process',
                        action='store_true')
    parser.add_argument('--hostname',
                        default=None,
                        help='IP of name server.')
    parser.add_argument('--shared-directory',
                        type=str,
                        help=('A directory that is accessible '
                              'for all processes, e.g. a NFS share'),
                        default='output/hypertune')
    args = parser.parse_args()
    print(args)

    MyWorker = WORKERS[args.model_type]

    if not args.hostname and socket.gethostname().lower().startswith('lenovo'):
        # If we are on cluster set IP
        args.hostname = hpns.nic_name_to_host('eno1')
    elif not args.hostname:
        args.hostname = '127.0.0.1'

    logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"),
                        format='%(asctime)s %(message)s',
                        datefmt='%I:%M:%S')

    args.callbacks = ['learning-rate-scheduler', 'early-stopping']

    if args.worker:
        # Start a worker in listening mode (waiting for jobs from master)
        w = MyWorker(
             args,
             run_id=args.run_id,
             host=args.hostname,
        )
        w.load_nameserver_credentials(working_directory=args.shared_directory)
        w.run(background=False)
        exit(0)

    result_logger = hpres.json_result_logger(
        directory=args.shared_directory,
        overwrite=True,
    )

    # Start a name server
    name_server = hpns.NameServer(
        run_id=args.run_id,
        host=args.hostname,
        port=0,
        working_directory=args.shared_directory,
    )
    ns_host, ns_port = name_server.start()

    # Run and optimizer
    bohb = HyperBand(
        configspace=MyWorker.get_configspace(),  # model can be an arg here?
        run_id=args.run_id,
        result_logger=result_logger,
        eta=args.eta,
        host=args.hostname,
        nameserver=ns_host,
        nameserver_port=ns_port,
        min_budget=args.min_budget,
        max_budget=args.max_budget,
    )

    res = bohb.run(n_iterations=args.n_iterations, min_n_workers=args.n_workers)

    # After the optimizer run, we must shutdown the master and the nameserver.
    bohb.shutdown(shutdown_workers=True)
    name_server.shutdown()

    id2config = res.get_id2config_mapping()
    incumbent = res.get_incumbent_id()
    inc_runs = res.get_runs_by_id(incumbent)
    inc_run = inc_runs[-1]
    all_runs = res.get_all_runs()

    print("Best loss {:6.2f}".format(inc_run.loss))
    print('A total of %i unique configurations where sampled.' %
          len(id2config.keys()))
    print('A total of %i runs where executed.' % len(all_runs))
    print('Total budget corresponds to %.1f full function evaluations.' %
          (sum([r.budget for r in all_runs]) / args.max_budget))
    print('The run took  %.1f seconds to complete.' %
          (all_runs[-1].time_stamps['finished'] -
           all_runs[0].time_stamps['started']))
Example #15
0
def get_parameters(selected_x, selected_y, kFold, num_threads):
    parser = argparse.ArgumentParser(
        description='Example 1 - sequential and local execution.')
    parser.add_argument('--min_budget',
                        type=float,
                        help='Minimum budget used during the optimization.',
                        default=1)
    parser.add_argument('--max_budget',
                        type=float,
                        help='Maximum budget used during the optimization.',
                        default=1)
    parser.add_argument('--n_iterations',
                        type=int,
                        help='Number of iterations performed by the optimizer',
                        default=100)
    # parser.add_argument('--worker', help='Flag to turn this into a worker process', action='store_true')
    parser.add_argument(
        '--shared_directory',
        type=str,
        help=
        'A directory that is accessible for all processes, e.g. a NFS share.',
        default='./result')
    parser.add_argument('--nic_name', type=str, default='lo')

    args = parser.parse_args()
    # if args.worker:
    #     import time
    #     time.sleep(5)   # short artificial delay to make sure the nameserver is already running
    #     w = worker(data)
    #
    #    # w.load_nameserver_credentials(working_directory=args.shared_directory)
    #     w.run(background=False)
    #     exit(0)

    host = hpns.nic_name_to_host(args.nic_name)

    result_logger = hpres.json_result_logger(directory=args.shared_directory,
                                             overwrite=False)

    # Step 1: Start a nameserver
    # Every run needs a nameserver. It could be a 'static' server with a
    # permanent address, but here it will be started for the local machine with the default port.
    # The nameserver manages the concurrent running workers across all possible threads or clusternodes.
    # Note the run_id argument. This uniquely identifies a run of any HpBandSter optimizer.
    NS = hpns.NameServer(run_id='example1', host=host, port=0)
    ns_host, ns_port = NS.start()

    # Step 2: Start a worker
    # Now we can instantiate a worker, providing the mandatory information
    # Besides the sleep_interval, we need to define the nameserver information and
    # the same run_id as above. After that, we can start the worker in the background,
    # where it will wait for incoming configurations to evaluate.
    w = worker(selected_x,
               selected_y,
               kFold,
               num_threads,
               host=host,
               run_id='example1',
               nameserver=ns_host,
               nameserver_port=ns_port)
    w.run(background=True)
    # Step 3: Run an optimizer
    # Now we can create an optimizer object and start the run.
    # Here, we run BOHB, but that is not essential.
    # The run method will return the `Result` that contains all runs performed.
    bohb = BOHB(configspace=w.get_configspace(),
                run_id='example1',
                host=host,
                nameserver=ns_host,
                nameserver_port=ns_port,
                result_logger=result_logger,
                min_budget=args.min_budget,
                max_budget=args.max_budget)
    res = bohb.run(n_iterations=args.n_iterations)

    # Step 4: Shutdown
    # After the optimizer run, we must shutdown the master and the nameserver.
    bohb.shutdown(shutdown_workers=True)
    NS.shutdown()

    # Step 5: Analysis
    # Each optimizer returns a hpbandster.core.result.Result object.
    # It holds informations about the optimization run like the incumbent (=best) configuration.
    # For further details about the Result object, see its documentation.
    # Here we simply print out the best config and some statistics about the performed runs.
    id2config = res.get_id2config_mapping()
    incumbent = res.get_incumbent_id()
    info = res.get_runs_by_id(incumbent)

    parameter = id2config[incumbent]['config']
    min_error = info[0]['loss']
    #booster = info[0]['info']

    return parameter, min_error  #, booster
Example #16
0
def run_BOHB(working_dir,
             result_file,
             n_bohb_iter=12,
             min_budget=1,
             max_budget=9,
             genotype="genotypes.KMNIST",
             warmstart=False,
             dataset='KMNIST'):

    nic_name = 'lo'
    port = 0
    run_id = 'bohb_run_1'

    previous_run = None
    if (warmstart):
        previous_run = hpres.logged_results_to_HBS_result(working_dir)
    try:
        # Start a nameserver
        host = hpns.nic_name_to_host(nic_name)
        ns = hpns.NameServer(run_id=run_id,
                             host=host,
                             port=port,
                             working_directory=working_dir)
        ns_host, ns_port = ns.start()

        # Start local worker
        worker = PyTorchWorker(dataset=dataset,
                               run_id=run_id,
                               host=host,
                               nameserver=ns_host,
                               nameserver_port=ns_port,
                               timeout=300)
        worker.genotype = genotype
        worker.run(background=True)

        bohb = None
        # Run an optimizer
        if (warmstart):
            bohb = BOHB(configspace=worker.get_configspace(),
                        run_id=run_id,
                        host=host,
                        nameserver=ns_host,
                        nameserver_port=ns_port,
                        min_budget=min_budget,
                        max_budget=max_budget,
                        previous_result=previous_run)
        else:
            result_logger = hpres.json_result_logger(directory=working_dir,
                                                     overwrite=True)
            bohb = BOHB(configspace=worker.get_configspace(),
                        run_id=run_id,
                        host=host,
                        nameserver=ns_host,
                        nameserver_port=ns_port,
                        min_budget=min_budget,
                        max_budget=max_budget,
                        result_logger=result_logger)

        result = bohb.run(n_iterations=n_bohb_iter)
        logging.info("Write result to file {}".format(result_file))
        with open(result_file, 'wb') as f:
            pickle.dump(result, f)
    finally:
        bohb.shutdown(shutdown_workers=True)
        ns.shutdown()
Example #17
0
def a2c_bohb_wrapper(**params):
    # Setup directories where live data is logged
    logdir = params["logdir"]
    a2c_output_dir = os.path.join(logdir, 'a2c_output')
    params["logdir"] = a2c_output_dir
    bohb_output_dir = os.path.join(logdir, 'bohb_output')

    logging.basicConfig(level=logging.INFO)  # logging.DEBUG for debug output
    logger = logging.getLogger()
    logger.propagate = False  # no duplicate logging outputs
    fh = logging.FileHandler(os.path.join(logdir, 'bohb.log'))
    fh.setLevel(logging.INFO)
    fh.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s:%(name)s: %(message)s'))
    logger.addHandler(fh)

    # Build configuration space and define all hyperparameters
    cs = ConfigurationSpace()
    lr = UniformFloatHyperparameter("lr", 1e-4, 1e-2, default_value=1e-3)
    units_shared_layer1 = UniformIntegerHyperparameter("units_shared_layer1", 8, 100, default_value=24)
    units_shared_layer2 = UniformIntegerHyperparameter("units_shared_layer2", 8, 100, default_value=24)
    units_policy_layer = UniformIntegerHyperparameter("units_policy_layer", 8, 100, default_value=24)
    vf_coeff = UniformFloatHyperparameter("vf_coeff", 1e-2, 0.5, default_value=0.1)
    ent_coeff = UniformFloatHyperparameter("ent_coeff", 5e-6, 1e-4, default_value=1e-5)
    gamma = UniformFloatHyperparameter("gamma", 0.6, 1., default_value=0.90)
    activ_fcn = CategoricalHyperparameter("activ_fcn", ['relu6', 'elu', 'mixed'], default_value='relu6')
    cs.add_hyperparameters([units_shared_layer1, units_shared_layer2, units_policy_layer,
                            vf_coeff, ent_coeff, gamma, lr, activ_fcn])  # batch_size

    logger.info('##############################################')
    logger.info('Run Optimization')
    logger.info('##############################################')
    if params["array_id"] == 1:
        # Setup directories where live data is logged
        if not os.path.isdir(a2c_output_dir):
            os.makedirs(a2c_output_dir)
        if not os.path.isdir(bohb_output_dir):
            os.makedirs(bohb_output_dir)

        # start nameserver
        NS = hpns.NameServer(run_id=params["instance_id"], nic_name=params["nic_name"],
                             working_directory=bohb_output_dir)
        ns_host, ns_port = NS.start()  # stores information for workers to find in working directory

        # BOHB is usually so cheap, that we can affort to run a worker on the master node, too.
        worker = A2CWorker(nameserver=ns_host, nameserver_port=ns_port, run_id=params["instance_id"], **params)
        worker.run(background=True)

        # Create scenario object
        logger.info('##############################################')
        logger.info('Setup BOHB instance')
        logger.info('##############################################')

        logger.info('Output_dir: %s' % bohb_output_dir)
        HB = BOHB(configspace=cs,
                  run_id=params["instance_id"],
                  eta=3,
                  min_budget=params["min_resource"],
                  max_budget=params["max_resource"],
                  host=ns_host,
                  nameserver=ns_host,
                  nameserver_port=ns_port,
                  ping_interval=3600)

        res = HB.run(n_iterations=4,
                     min_n_workers=4)  # BOHB can wait until a minimum number of workers is online before starting

        # pickle result here for later analysis
        with open(os.path.join(bohb_output_dir, 'results.pkl'), 'wb') as f:
            pickle.dump(res, f)

        id2config = res.get_id2config_mapping()
        print('A total of %i unique configurations where sampled.' % len(id2config.keys()))
        print('A total of %i runs where executed.' % len(res.get_all_runs()))

        # shutdown all workers
        HB.shutdown(shutdown_workers=True)

        # shutdown nameserver
        NS.shutdown()

    else:
        host = hpns.nic_name_to_host(params["nic_name"])

        # workers only instantiate the MyWorker, find the nameserver and start serving
        w = A2CWorker(run_id=params["instance_id"], host=host, **params)
        w.load_nameserver_credentials(bohb_output_dir)
        # run worker in the forground,
        w.run(background=False)
Example #18
0
def main():
    # Check quantities of train, validation and test images
    train_images = np.array(glob("data/train/*/*"))
    valid_images = np.array(glob("data/valid/*/*"))
    test_images = np.array(glob("data/test/*/*"))

    # Check relative percentages of image types
    train_images_mel = np.array(glob("data/train/melanoma/*"))
    train_images_nev = np.array(glob("data/train/nevus/*"))
    train_images_seb = np.array(glob("data/train/seborrheic_keratosis/*"))

    valid_images_mel = np.array(glob("data/valid/melanoma/*"))
    valid_images_nev = np.array(glob("data/valid/nevus/*"))
    valid_images_seb = np.array(glob("data/valid/seborrheic_keratosis/*"))

    test_images_mel = np.array(glob("data/test/melanoma/*"))
    test_images_nev = np.array(glob("data/test/nevus/*"))
    test_images_seb = np.array(glob("data/test/seborrheic_keratosis/*"))

    print(
        "There are {} training images, {} validation images and {} test images."
        .format(len(train_images), len(valid_images), len(test_images)))
    print(
        "For the training images, {mel:=.1f}% ({mel2}) are of melanoma, {nev:=.1f}% ({nev2}) are of nevus and {seb:=.1f}% ({seb2}) are for seborrheic keratosis."
        .format(mel=len(train_images_mel) / len(train_images) * 100,
                mel2=len(train_images_mel),
                nev=len(train_images_nev) / len(train_images) * 100,
                nev2=len(train_images_nev),
                seb=len(train_images_seb) / len(train_images) * 100,
                seb2=len(train_images_seb)))
    print(
        "For the validation images, {mel:=.1f}% ({mel2}) are of melanoma, {nev:=.1f}% ({nev2}) are of nevus and {seb:=.1f}% ({seb2}) are for seborrheic keratosis."
        .format(mel=len(valid_images_mel) / len(valid_images) * 100,
                mel2=len(valid_images_mel),
                nev=len(valid_images_nev) / len(valid_images) * 100,
                nev2=len(valid_images_nev),
                seb=len(valid_images_seb) / len(valid_images) * 100,
                seb2=len(valid_images_seb)))
    print(
        "For the test images, {mel:=.1f}% ({mel2}) are of melanoma, {nev:=.1f}% ({nev2}) are of nevus and {seb:=.1f}% ({seb2}) are for seborrheic keratosis."
        .format(mel=len(test_images_mel) / len(test_images) * 100,
                mel2=len(test_images_mel),
                nev=len(test_images_nev) / len(test_images) * 100,
                nev2=len(test_images_nev),
                seb=len(test_images_seb) / len(test_images) * 100,
                seb2=len(test_images_seb)))

    # Set HpBandSter logging
    logging.basicConfig(level=logging.DEBUG)

    # Define the parser. Note that key parametres are the min_budget, max_budget, shared_directory and n_iterations.
    parser = argparse.ArgumentParser(
        description='ISIC2017 - CNN on Derm Dataset')
    parser.add_argument('--min_budget',
                        type=float,
                        help='Minimum number of epochs for training.',
                        default=1)
    parser.add_argument('--max_budget',
                        type=float,
                        help='Maximum number of epochs for training.',
                        default=3)
    parser.add_argument('--n_iterations',
                        type=int,
                        help='Number of iterations performed by the optimizer',
                        default=16)
    parser.add_argument('--worker',
                        help='Flag to turn this into a worker process',
                        action='store_true')
    parser.add_argument(
        '--run_id',
        type=str,
        help=
        'A unique run id for this optimization run. An easy option is to use the job id of the clusters scheduler.'
    )
    parser.add_argument(
        '--nic_name',
        type=str,
        help='Which network interface to use for communication.',
        default='lo')
    parser.add_argument(
        '--shared_directory',
        type=str,
        help=
        'A directory that is accessible for all processes, e.g. a NFS share.',
        default='/home/ubuntu/src/derm-ai/data')
    parser.add_argument(
        '--backend',
        help=
        'Toggles which worker is used. Choose between a pytorch and a keras implementation.',
        choices=['pytorch', 'keras'],
        default='pytorch')
    args = parser.parse_args([])

    host = hpns.nic_name_to_host(args.nic_name)
    # This example shows how to log live results. This is most useful
    # for really long runs, where intermediate results could already be
    # interesting. The core.result submodule contains the functionality to
    # read the two generated files (results.json and configs.json) and
    # create a Result object.
    result_logger = hpres.json_result_logger(directory=args.shared_directory,
                                             overwrite=True)
    # Start a nameserver:
    NS = hpns.NameServer(run_id=args.run_id,
                         host=host,
                         port=0,
                         working_directory=args.shared_directory)
    ns_host, ns_port = NS.start()

    # Start local worker
    w = worker(run_id=args.run_id,
               host=host,
               nameserver=ns_host,
               nameserver_port=ns_port,
               timeout=120)
    w.run(background=True)

    bohb = BOHB(
        configspace=w.get_configspace(),
        run_id=args.run_id,
        host=host,
        nameserver=ns_host,
        nameserver_port=ns_port,
        result_logger=result_logger,
        min_budget=args.min_budget,
        max_budget=args.max_budget,
    )

    # Run an optimizer

    res = bohb.run(n_iterations=args.n_iterations)
    bohb.shutdown(shutdown_workers=True)
    NS.shutdown()
Example #19
0
def main():
    parser = argparse.ArgumentParser(description='Tensorforce hyperparameter tuner')
    parser.add_argument(
        'environment', help='Environment (name, configuration JSON file, or library module)'
    )
    parser.add_argument(
        '-l', '--level', type=str, default=None,
        help='Level or game id, like `CartPole-v1`, if supported'
    )
    parser.add_argument(
        '-m', '--max-repeats', type=int, default=1, help='Maximum number of repetitions'
    )
    parser.add_argument(
        '-n', '--num-iterations', type=int, default=1, help='Number of BOHB iterations'
    )
    parser.add_argument(
        '-d', '--directory', type=str, default='tuner', help='Output directory'
    )
    parser.add_argument(
        '-r', '--restore', type=str, default=None, help='Restore from given directory'
    )
    parser.add_argument('--id', type=str, default='worker', help='Unique worker id')
    args = parser.parse_args()

    if args.level is None:
        environment = Environment.create(environment=args.environment)
    else:
        environment = Environment.create(environment=args.environment, level=args.level)

    if False:
        host = nic_name_to_host(nic_name=None)
        port = 123
    else:
        host = 'localhost'
        port = None

    server = NameServer(run_id=args.id, working_directory=args.directory, host=host, port=port)
    nameserver, nameserver_port = server.start()

    worker = TensorforceWorker(
        environment=environment, run_id=args.id, nameserver=nameserver,
        nameserver_port=nameserver_port, host=host
    )
    # TensorforceWorker(run_id, nameserver=None, nameserver_port=None, logger=None, host=None, id=None, timeout=None)
    # logger: logging.logger instance, logger used for debugging output
    # id: anything with a __str__method, if multiple workers are started in the same process, you MUST provide a unique id for each one of them using the `id` argument.
    # timeout: int or float, specifies the timeout a worker will wait for a new after finishing a computation before shutting down. Towards the end of a long run with multiple workers, this helps to shutdown idling workers. We recommend a timeout that is roughly half the time it would take for the second largest budget to finish. The default (None) means that the worker will wait indefinitely and never shutdown on its own.

    worker.run(background=True)

    # config = cs.sample_configuration().get_dictionary()
    # print(config)
    # res = worker.compute(config=config, budget=1, working_directory='.')
    # print(res)

    if args.restore is None:
        previous_result = None
    else:
        previous_result = logged_results_to_HBS_result(directory=args.restore)

    result_logger = json_result_logger(directory=args.directory, overwrite=True)  # ???

    optimizer = BOHB(
        configspace=worker.get_configspace(), min_budget=0.5, max_budget=float(args.max_repeats),
        run_id=args.id, working_directory=args.directory,
        nameserver=nameserver, nameserver_port=nameserver_port, host=host,
        result_logger=result_logger, previous_result=previous_result
    )
    # BOHB(configspace=None, eta=3, min_budget=0.01, max_budget=1, min_points_in_model=None, top_n_percent=15, num_samples=64, random_fraction=1 / 3, bandwidth_factor=3, min_bandwidth=1e-3, **kwargs)
    # Master(run_id, config_generator, working_directory='.', ping_interval=60, nameserver='127.0.0.1', nameserver_port=None, host=None, shutdown_workers=True, job_queue_sizes=(-1,0), dynamic_queue_size=True, logger=None, result_logger=None, previous_result = None)
    # logger: logging.logger like object, the logger to output some (more or less meaningful) information

    results = optimizer.run(n_iterations=args.num_iterations)
    # optimizer.run(n_iterations=1, min_n_workers=1, iteration_kwargs={})
    # min_n_workers: int, minimum number of workers before starting the run

    optimizer.shutdown(shutdown_workers=True)
    server.shutdown()
    environment.close()

    with open(os.path.join(args.directory, 'results.pkl'), 'wb') as filehandle:
        pickle.dump(results, filehandle)

    print('Best found configuration:', results.get_id2config_mapping()[results.get_incumbent_id()]['config'])
    print('Runs:', results.get_runs_by_id(config_id=results.get_incumbent_id()))
    print('A total of {} unique configurations where sampled.'.format(len(results.get_id2config_mapping())))
    print('A total of {} runs where executed.'.format(len(results.get_all_runs())))
    print('Total budget corresponds to {:.1f} full function evaluations.'.format(
        sum([r.budget for r in results.get_all_runs()]) / args.max_repeats)
    )
def run_experiment(args,
                   worker,
                   dest_dir,
                   smac_deterministic,
                   store_all_runs=False):
    print("Running experiment (args: %s)" % str(args))
    # make sure the working and dest directory exist
    os.makedirs(args.working_directory, exist_ok=True)
    os.makedirs(dest_dir, exist_ok=True)

    if args.opt_method in ['randomsearch', 'bohb', 'hyperband']:
        print("Using hpbandster-optimizer (%s)" % args.opt_method)
        # Every process has to lookup the hostname
        host = hpns.nic_name_to_host(args.nic_name)
        print("Host: %s" % str(host))

        # setup a nameserver
        NS = hpns.NameServer(run_id=args.run_id,
                             nic_name=args.nic_name,
                             port=0,
                             host=host,
                             working_directory=args.working_directory)
        ns_host, ns_port = NS.start()
        print("Initialized nameserver (ns_host: %s; ns_port: %s)" %
              (str(ns_host), str(ns_port)))

        if args.worker:
            print("This is a pure worker-thread.")
            worker = get_worker(args, host=host)
            worker.load_nameserver_credentials(
                working_directory=args.working_directory)
            worker.run(background=False)
            print("Exiting...")
            exit(0)

        print(
            "This is the name-server thread, however there will be a worker running in the background."
        )
        worker = get_worker(args, host=host)

        # start worker in the background
        worker.load_nameserver_credentials(
            working_directory=args.working_directory)
        worker.run(background=True)

        if args.exp_name == 'paramnet_surrogates':
            print(
                "This is the paramnet_surrogates experiment, so any custom budgets will be replaced by the "
                "dataset-specific budgets.")
            args.min_budget, args.max_budget = worker.budgets[
                args.dataset_paramnet_surrogates]

        print(
            "Background-worker is running, grabbing configspace from worker and initializing result_logger "
            "(with dest_dir %s)" % dest_dir)
        configspace = worker.configspace

        result_logger = hpres.json_result_logger(directory=dest_dir,
                                                 overwrite=True)

        print("Getting optimizer.")

        opt = get_optimizer(
            args,
            configspace,
            working_directory=args.working_directory,
            run_id=args.run_id,
            min_budget=args.min_budget,
            max_budget=args.max_budget,
            host=host,
            nameserver=ns_host,
            nameserver_port=ns_port,
            ping_interval=30,
            result_logger=result_logger,
        )

        print("Initialization successful, starting optimization.")

        from ConfigSpace.read_and_write import pcs_new
        with open(os.path.join(dest_dir, 'configspace.pcs'), 'w') as fh:
            fh.write(pcs_new.write(opt.config_generator.configspace))

        result = opt.run(n_iterations=args.num_iterations,
                         min_n_workers=args.n_workers)

        print("Finished optimization")
        # shutdown the worker and the dispatcher
        opt.shutdown(shutdown_workers=True)
        NS.shutdown()

    if args.exp_name == 'paramnet_surrogates':
        # This if block is necessary to set budgets for paramnet_surrogates - for nothing else
        args_tmp = copy.deepcopy(args)
        args_tmp.opt_method = 'bohb'
        worker = get_worker(args_tmp)
        args.min_budget, args.max_budget = worker.budgets[
            args.dataset_paramnet_surrogates]

    # the number of iterations for the blackbox optimizers must be increased so they have comparable total budgets
    bb_iterations = int(args.num_iterations *
                        (1 +
                         (np.log(args.max_budget) - np.log(args.min_budget)) /
                         np.log(args.eta)))

    #if args.opt_method == 'tpe':
    #    result = worker.run_tpe(bb_iterations)

    if args.opt_method == 'smac':
        result = worker.run_smac(bb_iterations,
                                 deterministic=smac_deterministic,
                                 working_directory=args.dest_dir)

    if result is None:
        raise ValueError("Unknown method %s!" % args.method)

    return result
Example #21
0
                    help='The ensemble size to consider.')
parser.add_argument('--scheme', type=str, default='nes_re', help='scheme name')
parser.add_argument('--severity_list',
                    type=str,
                    default='0 5',
                    help='Severity levels to sample from during evolution')
parser.add_argument('--esa',
                    type=str,
                    default='beam_search',
                    help='Ensemble selection algorithm')

args = parser.parse_args()
np.random.seed(args.seed)
torch.manual_seed(args.seed)

host = nic_name_to_host(args.nic_name)

if args.array_id == 1:
    os.makedirs(args.working_directory, exist_ok=True)
    with open(os.path.join(args.working_directory, 'settings.txt'), 'w') as f:
        f.write(str(args))

    NS = NameServer(run_id=args.run_id,
                    host=host,
                    working_directory=args.working_directory)
    ns_host, ns_port = NS.start()

    # Regularized Evolution is usually so cheap, that we can afford to run a
    # worker on the master node as a background process
    worker = Worker(nameserver=ns_host,
                    nameserver_port=ns_port,
Example #22
0
def main(args):
    extra_string = ''

    if args.dataset == 'flights':
        if args.n_split == 0:
            extra_string += '_2M'
        elif args.n_split == 1:
            extra_string += '_800k'
        else:
            raise Exception(
                'Only Valid values for flight splits are 0 (2M) or 1 (800k)')
        extra_string += '_valprop_' + str(args.valprop)

    elif args.dataset in [
            'boston', 'concrete', 'energy', 'power', 'wine', 'yacht', 'kin8nm',
            'naval', 'protein', 'boston_gap', 'concrete_gap', 'energy_gap',
            'power_gap', 'wine_gap', 'yacht_gap', 'kin8nm_gap', 'naval_gap',
            'protein_gap'
    ]:
        extra_string += '_split_' + str(args.n_split)
        extra_string += '_valprop_' + str(args.valprop)

    working_dir = args.result_folder + '/' + args.dataset + extra_string + '/' + args.method +\
        ('-' + args.network if args.network != "ResNet" else '') + '/' + str(args.width) + '/' + str(args.batch_size) +\
        '/' + args.run_id
    print("WORKING DIR")
    print(working_dir)

    # Create data dir if necessary
    if not os.path.exists(args.data_folder):
        mkdir(args.data_folder)

    # Every process has to lookup the hostname
    host = hpns.nic_name_to_host(args.nic_name)

    result_logger = hpres.json_result_logger(directory=working_dir,
                                             overwrite=False)

    # Start a nameserver:
    NS = hpns.NameServer(run_id=args.run_id,
                         host=host,
                         port=0,
                         working_directory=working_dir)
    ns_host, ns_port = NS.start()

    workers = []
    for i in range(args.num_workers):
        print("CREATING WORKER:", i)
        if args.dataset == 'spirals':
            worker_class = create_SpiralsWorker(args.method, args.network,
                                                args.width, args.batch_size)
            worker = worker_class(early_stop=args.early_stop,
                                  run_id=args.run_id,
                                  host=host,
                                  nameserver=ns_host,
                                  nameserver_port=ns_port,
                                  timeout=600,
                                  id=i)
        elif args.dataset == 'flights':
            worker_class = create_FlightWorker(args.method, args.network,
                                               args.width, args.batch_size)
            worker = worker_class(base_dir=args.data_folder,
                                  prop_val=args.valprop,
                                  k800=(args.n_split == 1),
                                  early_stop=args.early_stop,
                                  run_id=args.run_id,
                                  host=host,
                                  nameserver=ns_host,
                                  nameserver_port=ns_port,
                                  timeout=600,
                                  id=i)
        elif args.dataset in [
                'boston', 'concrete', 'energy', 'power', 'wine', 'yacht',
                'kin8nm', 'naval', 'protein', 'boston_gap', 'concrete_gap',
                'energy_gap', 'power_gap', 'wine_gap', 'yacht_gap',
                'kin8nm_gap', 'naval_gap', 'protein_gap'
        ]:
            worker_class = create_UCIWorker(args.method, args.network,
                                            args.width, args.batch_size)
            worker = worker_class(dname=args.dataset,
                                  base_dir=args.data_folder,
                                  prop_val=args.valprop,
                                  n_split=args.n_split,
                                  early_stop=args.early_stop,
                                  run_id=args.run_id,
                                  host=host,
                                  nameserver=ns_host,
                                  nameserver_port=ns_port,
                                  timeout=600,
                                  id=i)
        else:
            raise ValueError('Dataset not implemented yet!')

        worker.run(background=True)
        workers.append(worker)

    n_iterations = args.n_iterations
    previous_run = None
    if args.previous_result_folder is not None:
        try:
            previous_run = hpres.logged_results_to_HBS_result(
                args.previous_result_folder)
        except Exception as e:
            print(e)

    # Run an optimizer
    bohb = BOHB(
        configspace=worker.get_configspace(),
        run_id=args.run_id,
        host=host,
        nameserver=ns_host,
        nameserver_port=ns_port,
        result_logger=result_logger,
        min_budget=args.min_budget,
        max_budget=args.max_budget,
        previous_result=previous_run,
    )

    res = bohb.run(n_iterations=n_iterations, min_n_workers=args.num_workers)

    # store results
    with open(os.path.join(working_dir, 'results.pkl'), 'wb') as fh:
        pickle.dump(res, fh)

    # shutdown
    bohb.shutdown(shutdown_workers=True)
    NS.shutdown()

    id2config = res.get_id2config_mapping()
    incumbent = res.get_incumbent_id()

    id2config = res.get_id2config_mapping()
    incumbent = res.get_incumbent_id()

    all_runs = res.get_all_runs()

    print('Best found configuration:', id2config[incumbent]['config'])
    print('A total of %i unique configurations where sampled.' %
          len(id2config.keys()))
    print('A total of %i runs where executed.' % len(res.get_all_runs()))
    print('Total budget corresponds to %.1f full function evaluations.' %
          (sum([r.budget for r in all_runs]) / args.max_budget))
    print('The run took  %.1f seconds to complete.' %
          (all_runs[-1].time_stamps['finished'] -
           all_runs[0].time_stamps['started']))
Example #23
0
File: abas.py Project: lr94/abas
def main():
    args = parse_args()

    # Set log level
    logging.basicConfig(level={
        'critical': logging.CRITICAL,
        'warning': logging.WARNING,
        'info': logging.INFO,
        'debug': logging.DEBUG
    }[args.loglevel])

    # Name for the current experiment (optimization, not single training)
    exp_name = args.exp_name or get_default_exp_name(args) + (('_' + args.exp_suffix) if args.exp_suffix else '')

    logdir = os.path.join(args.logdir, exp_name)
    shared_dir = os.path.join(logdir, 'master')
    os.makedirs(shared_dir, exist_ok=True)  # Also creates logdir if it does not exist

    host = hpns.nic_name_to_host(args.nic_name)

    # If this is meant to be a worker process, launch it
    if args.worker:
        w = AbasWorker(run_id=exp_name,
                       source=args.source,
                       target=args.target,
                       net=args.net,
                       load_workers=args.load_workers,
                       max_iter=args.max_iter,
                       logdir=args.logdir,
                       ds_root=args.data_root,
                       no_tqdm=args.no_tqdm,
                       gpu=args.gpu,
                       run_n_avg=args.run_n_avg,
                       da_method=args.da,
                       model_criterion=args.criterion,
                       run_model_criterion=args.run_criterion or args.criterion,
                       kill_diverging=args.kill_diverging,

                       host=host,
                       timeout=args.timeout)
        w.load_nameserver_credentials(working_directory=shared_dir)
        w.run(background=False)
        # Nothing to do, exit
        print("Done")
        exit(0)

    # If we are here we expect to be a master
    if not args.master:
        print("Nothing to do (not a master nor a worker process)")
        exit(1)

    # Running as master!

    # Log info
    Logger(logdir=logdir, run_name='master', use_tqdm=False, use_tb=False)

    # Init the nameserver (random port)
    ns = hpns.NameServer(run_id=exp_name, host=host, port=0, working_directory=shared_dir)
    ns_host, ns_port = ns.start()
    print("Nameserver on {}:{}".format(ns_host, ns_port))

    # These hyperparameters are passed through the command line and are not optimized
    hp = {
        'base.lr': args.lr,
        'base.bs': args.bs,
        'base.wd': args.wd,
    }

    # Load previous runs
    previous_res = None
    if args.previous != '':
        if os.path.isdir(args.previous):
            previous_res = hpres.logged_results_to_HBS_result(args.previous)
        else:
            with open(args.previous, 'rb') as fp:
                previous_res = pickle.load(fp)

    # Safe file removal
    remove_file(os.path.join(shared_dir, 'config.json'))
    remove_file(os.path.join(shared_dir, 'results.json'))

    # Launch BOHB
    opt_logger = hpres.json_result_logger(directory=shared_dir, overwrite=False)
    bohb = BOHB(
        configspace=AbasWorker.get_configspace(hp),
        previous_result=previous_res,
        run_id=exp_name,

        min_budget=args.min_budget, max_budget=args.max_budget,
        eta=args.eta,

        host=host,
        nameserver=ns_host, nameserver_port=ns_port,
        ping_interval=15,

        result_logger=opt_logger
    )

    res = bohb.run(n_iterations=args.num_iterations, min_n_workers=args.num_workers)

    # Done
    bohb.shutdown(shutdown_workers=True)
    ns.shutdown()

    # Save results
    id2config = res.get_id2config_mapping()
    incumbent = res.get_incumbent_id()

    all_runs = res.get_all_runs()

    with open(os.path.join(logdir, 'result_{}.pkl'.format(exp_name)), 'wb') as fp:
        pickle.dump(res, fp)

    print(f"Best found configuration: {id2config[incumbent]['config']}")
    print(f"Total number of sampled unique configurations: {len(id2config.keys())}")
    print(f"Total runs {len(res.get_all_runs())}")
    print("ABAS run took {:.1f} seconds".format(
            all_runs[-1].time_stamps['finished'] - all_runs[0].time_stamps['started']))
Example #24
0
def get_parameters(data, target_feature_index):
    parser = argparse.ArgumentParser(
        description='Example 1 - sequential and local execution.')
    parser.add_argument('--min_budget',
                        type=float,
                        help='Minimum budget used during the optimization.',
                        default=9)
    parser.add_argument('--max_budget',
                        type=float,
                        help='Maximum budget used during the optimization.',
                        default=243)
    parser.add_argument('--n_iterations',
                        type=int,
                        help='Number of iterations performed by the optimizer',
                        default=4)
    parser.add_argument('--n_workers',
                        type=int,
                        help='Number of workers to run in parallel.',
                        default=2)
    parser.add_argument('--worker',
                        help='Flag to turn this into a worker process',
                        action='store_true')
    parser.add_argument(
        '--run_id',
        type=str,
        help=
        'A unique run id for this optimization run. An easy option is to use the job id of the clusters scheduler.'
    )
    parser.add_argument(
        '--nic_name',
        type=str,
        help='Which network interface to use for communication.',
        default='lo')
    parser.add_argument(
        '--shared_directory',
        type=str,
        help=
        'A directory that is accessible for all processes, e.g. a NFS share.',
        default='/home/lchen/parameters/result')

    args = parser.parse_args()

    host = hpns.nic_name_to_host(args.nic_name)

    if args.worker:
        time.sleep(
            5
        )  # short artificial delay to make sure the nameserver is already running
        w = worker(0.5,
                   data,
                   target_feature_index,
                   run_id=args.run_id,
                   host=host)
        w.load_nameserver_credentials(working_directory=args.shared_directory)
        w.run(background=False)
        exit(0)

    result_logger = hpres.json_result_logger(directory=args.shared_directory,
                                             overwrite=True)

    # Step 1: Start a nameserver
    # Every run needs a nameserver. It could be a 'static' server with a
    # permanent address, but here it will be started for the local machine with the default port.
    # The nameserver manages the concurrent running workers across all possible threads or clusternodes.
    # Note the run_id argument. This uniquely identifies a run of any HpBandSter optimizer.
    NS = hpns.NameServer(run_id='test1',
                         host=host,
                         port=0,
                         working_directory=args.shared_directory)
    ns_host, ns_port = NS.start()
    # Step 2: Start a worker
    # Now we can instantiate a worker, providing the mandatory information
    # Besides the sleep_interval, we need to define the nameserver information and
    # the same run_id as above. After that, we can start the worker in the background,
    # where it will wait for incoming configurations to evaluate.

    w = worker(0.5,
               data,
               target_feature_index,
               run_id='test1',
               host=host,
               nameserver=ns_host,
               nameserver_port=ns_port)
    w.run(background=True)
    # Step 3: Run an optimizer
    # Now we can create an optimizer object and start the run.
    # Here, we run BOHB, but that is not essential.
    # The run method will return the `Result` that contains all runs performed.
    bohb = BOHB(configspace=worker.get_configspace(),
                run_id=args.run_id,
                host=host,
                nameserver=ns_host,
                nameserver_port=ns_port,
                result_logger=result_logger,
                min_budget=args.min_budget,
                max_budget=args.max_budget)
    print("daozhele5")
    res = bohb.run(n_iterations=args.n_iterations)
    print("daozhele6")
    bohb.shutdown(shutdown_workers=True)
    NS.shutdown()

    # Step 5: Analysis
    # Each optimizer returns a hpbandster.core.result.Result object.
    # It holds informations about the optimization run like the incumbent (=best) configuration.
    # For further details about the Result object, see its documentation.
    # Here we simply print out the best config and some statistics about the performed runs.
    id2config = res.get_id2config_mapping()
    incumbent = res.get_incumbent_id()
    info = res.get_runs_by_id(incumbent)

    parameter = id2config[incumbent]['config']
    min_error = info[0]['loss']
    feature_importance_dict = info[0]['info']

    with open(os.path.join(args.shared_directory, 'results.pkl'), 'wb') as fh:
        pickle.dump(res, fh)

    return parameter, min_error, feature_importance_dict
Example #25
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--n_iterations',
                        type=int,
                        help='Number of iterations performed by the optimizer',
                        default=4)
    parser.add_argument('--worker',
                        help='Flag to turn this into a worker process',
                        action='store_true')
    parser.add_argument(
        '--run_id',
        type=str,
        help=
        'A unique run id for this optimization run. An easy option is to use'
        ' the job id of the clusters scheduler.')
    parser.add_argument(
        '--shared_directory',
        type=str,
        help=
        'A directory that is accessible for all processes, e.g. a NFS share.')
    parser.add_argument('--interface',
                        type=str,
                        help='Which network interface to use',
                        default="eth1")

    args = parser.parse_args()

    try:
        os.mkdir(args.shared_directory)
    except FileExistsError:
        pass

    # Every process has to lookup the hostname
    host = hpns.nic_name_to_host(args.interface)

    if args.worker:
        time.sleep(
            60
        )  # short artificial delay to make sure the nameserver is already running
        w = MyWorker(run_id=args.run_id, host=host)
        w.load_nameserver_credentials(working_directory=args.shared_directory)
        w.run(background=False)
        exit(0)

    # Write the configspace
    cs = MyWorker.get_configspace()
    with open(os.path.join(args.shared_directory, 'configspace.json'),
              "w") as fh:
        fh.write(pcs_out.write(cs))

    result_logger = hpres.json_result_logger(directory=args.shared_directory,
                                             overwrite=True)
    NS = hpns.NameServer(run_id=args.run_id,
                         host=host,
                         port=0,
                         working_directory=args.shared_directory)
    ns_host, ns_port = NS.start()

    w = MyWorker(run_id=args.run_id,
                 host=host,
                 nameserver=ns_host,
                 nameserver_port=ns_port)
    w.run(background=True)

    # Run an optimizer
    # We now have to specify the host, and the nameserver information
    bohb = BOHB(configspace=cs,
                run_id=args.run_id,
                host=host,
                nameserver=ns_host,
                nameserver_port=ns_port,
                eta=3,
                result_logger=result_logger,
                min_budget=1,
                max_budget=9)
    res = bohb.run(n_iterations=args.n_iterations, min_n_workers=1)

    # In a cluster environment, you usually want to store the results for later analysis.
    # One option is to simply pickle the Result object
    with open(os.path.join(args.shared_directory, 'results.pkl'), 'wb') as fh:
        pickle.dump(res, fh)

    # Step 4: Shutdown
    # After the optimizer run, we must shutdown the master and the nameserver.
    bohb.shutdown(shutdown_workers=True)
    NS.shutdown()
Example #26
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Tensorforce hyperparameter tuner, using BOHB optimizer (Bayesian Optimization '
        'and Hyperband)')
    # Environment arguments (from run.py)
    parser.add_argument(
        '-e',
        '--environment',
        type=str,
        help='Environment (name, configuration JSON file, or library module)')
    parser.add_argument(
        '-l',
        '--level',
        type=str,
        default=None,
        help='Level or game id, like `CartPole-v1`, if supported')
    parser.add_argument('-m',
                        '--max-episode-timesteps',
                        type=int,
                        default=None,
                        help='Maximum number of timesteps per episode')
    parser.add_argument(
        '--import-modules',
        type=str,
        default=None,
        help='Import comma-separated modules required for environment')
    # Runner arguments (from run.py)
    parser.add_argument('-n',
                        '--episodes',
                        type=int,
                        help='Number of episodes')
    parser.add_argument(
        '-p',
        '--num-parallel',
        type=int,
        default=None,
        help='Number of environment instances to execute in parallel')
    # Tuner arguments
    parser.add_argument(
        '-r',
        '--runs-per-round',
        type=str,
        default='1,2,5,10',
        help=
        'Comma-separated number of runs per optimization round, each with a successively '
        'smaller number of candidates')
    parser.add_argument(
        '-s',
        '--selection-factor',
        type=int,
        default=3,
        help=
        'Selection factor n, meaning that one out of n candidates in each round advances to '
        'the next optimization round')
    parser.add_argument(
        '-i',
        '--num-iterations',
        type=int,
        default=1,
        help=
        'Number of optimization iterations, each consisting of a series of optimization '
        'rounds with an increasingly reduced candidate pool')
    parser.add_argument('-d',
                        '--directory',
                        type=str,
                        default='tuner',
                        help='Output directory')
    parser.add_argument('--restore',
                        type=str,
                        default=None,
                        help='Restore from given directory')
    parser.add_argument('--id',
                        type=str,
                        default='worker',
                        help='Unique worker id')
    args = parser.parse_args()

    if args.import_modules is not None:
        for module in args.import_modules.split(','):
            importlib.import_module(name=module)

    environment = dict(environment=args.environment)
    if args.level is not None:
        environment['level'] = args.level

    if False:
        host = nic_name_to_host(nic_name=None)
        port = 123
    else:
        host = 'localhost'
        port = None

    runs_per_round = tuple(int(x) for x in args.runs_per_round.split(','))
    print('Bayesian Optimization and Hyperband optimization')
    print(
        f'{args.num_iterations} iterations of each {len(runs_per_round)} rounds:'
    )
    for n, num_runs in enumerate(runs_per_round, start=1):
        num_candidates = round(
            math.pow(args.selection_factor,
                     len(runs_per_round) - n))
        print(f'round {n}: {num_candidates} candidates, each {num_runs} runs')
    print()

    server = NameServer(run_id=args.id,
                        working_directory=args.directory,
                        host=host,
                        port=port)
    nameserver, nameserver_port = server.start()

    worker = TensorforceWorker(
        environment=environment,
        max_episode_timesteps=args.max_episode_timesteps,
        num_episodes=args.episodes,
        base=args.selection_factor,
        runs_per_round=runs_per_round,
        num_parallel=args.num_parallel,
        run_id=args.id,
        nameserver=nameserver,
        nameserver_port=nameserver_port,
        host=host)
    worker.run(background=True)

    if args.restore is None:
        previous_result = None
    else:
        previous_result = logged_results_to_HBS_result(directory=args.restore)

    result_logger = json_result_logger(directory=args.directory,
                                       overwrite=True)

    optimizer = BOHB(configspace=worker.get_configspace(),
                     eta=args.selection_factor,
                     min_budget=0.9,
                     max_budget=math.pow(args.selection_factor,
                                         len(runs_per_round) - 1),
                     run_id=args.id,
                     working_directory=args.directory,
                     nameserver=nameserver,
                     nameserver_port=nameserver_port,
                     host=host,
                     result_logger=result_logger,
                     previous_result=previous_result)
    # BOHB(configspace=None, eta=3, min_budget=0.01, max_budget=1, min_points_in_model=None,
    # top_n_percent=15, num_samples=64, random_fraction=1 / 3, bandwidth_factor=3,
    # min_bandwidth=1e-3, **kwargs)
    # Master(run_id, config_generator, working_directory='.', ping_interval=60,
    # nameserver='127.0.0.1', nameserver_port=None, host=None, shutdown_workers=True,
    # job_queue_sizes=(-1,0), dynamic_queue_size=True, logger=None, result_logger=None,
    # previous_result = None)
    # logger: logging.logger like object, the logger to output some (more or less meaningful)
    # information

    results = optimizer.run(n_iterations=args.num_iterations)
    # optimizer.run(n_iterations=1, min_n_workers=1, iteration_kwargs={})
    # min_n_workers: int, minimum number of workers before starting the run

    optimizer.shutdown(shutdown_workers=True)
    server.shutdown()

    with open(os.path.join(args.directory, 'results.pkl'), 'wb') as filehandle:
        pickle.dump(results, filehandle)

    print('Best found configuration: {}'.format(
        results.get_id2config_mapping()[results.get_incumbent_id()]['config']))
    print('Runs:',
          results.get_runs_by_id(config_id=results.get_incumbent_id()))
    print('A total of {} unique configurations where sampled.'.format(
        len(results.get_id2config_mapping())))
    print('A total of {} runs where executed.'.format(
        len(results.get_all_runs())))
Example #27
0
    def __init__(self, num_workers, num_iterations, run_id, array_id,
                 working_dir, nic_name, network, min_budget, max_budget, eta):

        x, y, categorical = model.get_dataset()
        feature_type = determine_feature_type(categorical)
        nr_features = x.shape[1]

        if network == 'fcresnet':
            config_space = get_fixed_conditional_fcresnet_config(
                nr_features,
                feature_type,
                num_res_blocks=4,
                super_blocks=2,
                nr_units=64)
        else:
            config_space = get_fixed_conditional_fc_config(nr_features,
                                                           feature_type,
                                                           max_nr_layers=9)

        if array_id == 1:

            result_logger = hpres.json_result_logger(directory=working_dir,
                                                     overwrite=True)

            # start nameserver
            ns = hpns.NameServer(run_id=run_id,
                                 nic_name=nic_name,
                                 working_directory=working_dir)

            ns_host, ns_port = ns.start(
            )  # stores information for workers to find in working_directory

            # BOHB is usually so cheap, that we can affort to run a worker on the master node, too.
            worker = Slave(nameserver=ns_host,
                           nameserver_port=ns_port,
                           run_id=run_id)
            worker.run(background=True)

            hb = BOHB(configspace=config_space,
                      run_id=run_id,
                      eta=eta,
                      min_budget=min_budget,
                      max_budget=max_budget,
                      host=ns_host,
                      nameserver=ns_host,
                      result_logger=result_logger,
                      nameserver_port=ns_port,
                      ping_interval=3600)

            # BOHB can wait until a minimum number of workers
            # is online before starting
            res = hb.run(n_iterations=num_iterations,
                         min_n_workers=num_workers)

            # pickle result here for later analysis
            with open(os.path.join(working_dir, 'results.pkl'), 'wb') as fh:
                pickle.dump(res, fh)

            # shutdown all workers
            hb.shutdown(shutdown_workers=True)

            # and the nameserver
            ns.shutdown()

        else:

            host = hpns.nic_name_to_host(nic_name)

            # workers only instantiate the Slave, find the nameserver and start serving
            w = Slave(run_id=run_id, host=host)
            while True:
                try:
                    w.load_nameserver_credentials(working_dir)
                    break
                except RuntimeError as e:
                    # do nothing
                    # wait until configuration is
                    # found
                    pass

            # run worker in the foreground,
            w.run(background=False)
Example #28
0
parser.add_argument('--nic_name',
                    type=str,
                    help='Which network interface to use for communication.',
                    default="en4")
parser.add_argument('--run_id', type=int, help='run ID.', default=1)
parser.add_argument(
    '--shared_directory',
    type=str,
    help='A directory that is accessible for all processes, e.g. a NFS share.',
    default="output")
#parser.add_argument('--worker', help='Flag to turn this into a worker process', action='store_true')

args = parser.parse_args()

# Every process has to lookup the hostname
host = hpns.nic_name_to_host(args.nic_name)

if host_name == "synaptomes1":
    prefix = "syn1"
else:
    prefix = "test_run"

outputName = f"{prefix}_openml_d_{str(args.openml_dataid)}"

# Step 1: Start a nameserver (see example_1)
# Start a nameserver:
# We now start the nameserver with the host name from above and a random open port (by setting the port to 0)
NS = hpns.NameServer(run_id=args.run_id,
                     host=host,
                     port=0,
                     working_directory=args.shared_directory)
Example #29
0
def optimize_in_model(path, args, n_gram, tokens):
    articles = ['1', '2', '3', '5', '6', '8', '10', '11', '13', '34', 'p1']

    for article in articles:
        if 'dnn' in path:
            from model_workers import KerasWorker as worker
        elif 'random_forest' in path:
            from model_workers import RandomForestWorker as worker
        elif 'xgboost' in path:
            from model_workers import XGBoostWorker as worker
        elif 'svm' in path:
            from model_workers import SVMWorker as worker
        elif 'extratrees' in path:
            from model_workers import ExtraTreesWorker as worker

        for inter in ni.interfaces():
            try:
                # Every process has to lookup the hostname
                host = hpns.nic_name_to_host(inter)

                if args.worker:
                    import time
                    time.sleep(
                        5
                    )  # short artificial delay to make sure the nameserver is already running
                    w = worker(run_id=args.run_id,
                               host=host,
                               timeout=120,
                               article=article,
                               flavor=args.flavor)
                    w.load_nameserver_credentials(working_directory=path)
                    w.run(background=False)
                    exit(0)

                # This example shows how to log live results. This is most useful
                # for really long runs, where intermediate results could already be
                # interesting. The core.result submodule contains the functionality to
                # read the two generated files (results.json and configs.json) and
                # create a Result object.
                result_logger = hpres.json_result_logger(directory=path,
                                                         overwrite=True)

                # Start a nameserver:
                NS = hpns.NameServer(run_id=args.run_id,
                                     host=host,
                                     port=0,
                                     working_directory=path)
                ns_host, ns_port = NS.start()
                break
            except gaierror:
                continue

        # Start local worker
        w = worker(run_id=args.run_id,
                   host=host,
                   nameserver=ns_host,
                   nameserver_port=ns_port,
                   timeout=120,
                   article=article,
                   flavor=args.flavor,
                   id=1)
        w.run(background=True)

        # Run an optimizer
        bohb = BOHB(
            configspace=worker.get_configspace(),
            run_id=args.run_id,
            host=host,
            nameserver=ns_host,
            nameserver_port=ns_port,
            result_logger=result_logger,
            min_budget=args.min_budget,
            max_budget=args.max_budget,
        )

        res = bohb.run(n_iterations=args.n_iterations)

        # store results
        with open(os.path.join(path, 'results.pkl'), 'wb') as fh:
            pickle.dump(res, fh)

        # shutdown
        bohb.shutdown(shutdown_workers=True)
        NS.shutdown()
        # Keep track of the best results
        store_best_result_and_config(model_path=path,
                                     article=article,
                                     n_gram=n_gram,
                                     tokens=tokens,
                                     flavor=args.flavor,
                                     preprocessed=0,
                                     pipeline=None)
        move_results_to_storage(n_gram=n_gram,
                                tokens=tokens,
                                article=article,
                                flavor=args.flavor,
                                model=path.split('/')[-2])