Exemple #1
0
def test_random_search(sc):

    sp = Searchspace(argument_param=("DOUBLE", [1, 5]))

    rs = RandomSearch(5, sp, [])

    exp_result = {"argument_param": "DOUBLE"}

    assert sp.names() == exp_result
    assert rs.num_trials == 5
    assert rs.searchspace == sp
Exemple #2
0
def test_randomsearch(sc):
    def train(model, train_set, test_set, hparams, reporter):

        if "argument_param" in hparams.keys():
            print("Entered train function with param {}".format(
                hparams["argument_param"]))

        for i in range(5):
            acc = i + random.random()
            reporter.broadcast(metric=acc)
            reporter.log("Metric: {}".format(acc))

            # do something with HP.
            if "argument_param" in hparams.keys():
                time.sleep(hparams["argument_param"])

        return acc

    sp = Searchspace(argument_param=("DOUBLE", [1, 5]))

    config = HyperparameterOptConfig(
        searchspace=sp,
        optimizer="randomsearch",
        direction="max",
        num_trials=5,
        name="test",
        hb_interval=1,
        es_interval=10,
    )

    result = experiment.lagom(train_fn=train, config=config)
    assert type(result) == type({})

    test_dt_tensorflow(sc)
Exemple #3
0
def test_randomsearch(sc):

    sp = Searchspace(argument_param=("DOUBLE", [1, 5]))

    def train(argument_param, reporter):

        print("Entered train function with param {}".format(argument_param))

        for i in range(5):
            acc = i + random.random()
            reporter.broadcast(metric=acc)
            reporter.log("Metric: {}".format(acc))

            time.sleep(argument_param)

        return acc

    result = experiment.lagom(
        train,
        sp,
        optimizer="randomsearch",
        direction="max",
        num_trials=25,
        name="test",
        hb_interval=1,
        es_interval=10,
    )
    assert type(result) == type({})
Exemple #4
0
def test_randomsearch_init():

    sp = Searchspace(argument_param=("DOUBLE", [1, 5]), param2=("integer", [3, 4]))

    rs = RandomSearch(5, sp, [])

    assert rs.num_trials == 5
    assert rs.searchspace == sp
Exemple #5
0
def test_rs_initialize2():

    sp = Searchspace(argument_param=("DISCRETE", [1, 5]))

    rs = RandomSearch(5, sp, [])

    with pytest.raises(NotImplementedError) as excinfo:
        rs.initialize()
    assert "Searchspace needs at least one continuous parameter" in str(excinfo.value)
Exemple #6
0
def test_randomsearch_initialize():

    sp = Searchspace(argument_param=("DOUBLE", [1, 5]), param2=("integer", [3, 4]))

    rs = RandomSearch(5, sp, [])

    rs.initialize()

    assert len(rs.trial_buffer) == 5
    def _init_searchspace(searchspace: Searchspace) -> Searchspace:
        """Checks for a valid searchspace config.

        :param searchspace: The searchspace initialization argument.

        :raises ValueError: If the searchspace is invalid.

        :returns: The valid searchspace."""
        if not isinstance(searchspace, Searchspace) or searchspace is None:
            raise ValueError(
                """The experiment's search space should be an instance of
                 maggy.Searchspace, but it is {} (of type '{}').""".format(
                    str(searchspace),
                    type(searchspace).__name__))
        return searchspace if isinstance(searchspace,
                                         Searchspace) else Searchspace()
    def _init_controller(optimizer: Union[str, AbstractOptimizer],
                         searchspace: Searchspace) -> AbstractOptimizer:
        """Checks for a valid optimizer config.

        :param optimizer: The optimizer to be checked.
        :param searchspace: The searchspace for hyperparameter optimization.

        :raises KeyError: If the optimizer is given as a string, but is not in
            the dict of supported optimizers.
        :raises TypeError: If the searchspace and optimizer mismatch or the
            optimizer is of wrong type.

        :returns: The validated Optimizer
        """
        optimizer = ("none" if optimizer is None else optimizer
                     )  # Convert None key to usable string.
        if optimizer == "none" and not searchspace.names():
            optimizer = "faulty_none"
        if isinstance(optimizer, str):
            try:
                return HyperparameterOptDriver.controller_dict[
                    optimizer.lower()]()
            except KeyError as exc:
                raise KeyError(
                    "Unknown Optimizer. Can't initialize experiment driver."
                ) from exc
            except TypeError as exc:
                raise TypeError(
                    "Searchspace has to be empty or None to use without Optimizer."
                ) from exc
        elif isinstance(optimizer, AbstractOptimizer):
            print("Custom Optimizer initialized.")
            return optimizer
        else:
            raise TypeError(
                "The experiment's optimizer should either be an string indicating the name "
                "of an implemented optimizer (such as 'randomsearch') or an instance of "
                "maggy.optimizer.AbstractOptimizer, "
                "but it is {0} (of type '{1}').".format(
                    str(optimizer),
                    type(optimizer).__name__))
Exemple #9
0
    def __init__(self, experiment_type, **kwargs):

        global driver_secret

        # COMMON EXPERIMENT SETUP
        self._final_store = []
        self._trial_store = {}
        self.num_executors = kwargs.get("num_executors")
        self._message_q = queue.Queue()
        self.name = kwargs.get("name")
        self.experiment_done = False
        self.worker_done = False
        self.hb_interval = kwargs.get("hb_interval")
        self.description = kwargs.get("description")
        self.experiment_type = experiment_type
        self.es_interval = kwargs.get("es_interval")
        self.es_min = kwargs.get("es_min")

        # TYPE-SPECIFIC EXPERIMENT SETUP
        if self.experiment_type == "optimization":
            # set up an optimization experiment

            self.num_trials = kwargs.get("num_trials", 1)

            searchspace = kwargs.get("searchspace")
            if isinstance(searchspace, Searchspace):
                self.searchspace = searchspace
            elif searchspace is None:
                self.searchspace = Searchspace()
            else:
                raise Exception(
                    "The experiment's search space should be an instance of maggy.Searchspace, "
                    "but it is {0} (of type '{1}').".format(
                        str(searchspace),
                        type(searchspace).__name__))

            optimizer = kwargs.get("optimizer")

            if optimizer is None:
                if len(self.searchspace.names()) == 0:
                    self.optimizer = SingleRun()
                else:
                    raise Exception(
                        "Searchspace has to be empty or None to use without optimizer"
                    )
            elif isinstance(optimizer, str):
                if optimizer.lower() == "randomsearch":
                    self.optimizer = RandomSearch()
                elif optimizer.lower() == "asha":
                    self.optimizer = Asha()
                elif optimizer.lower() == "none":
                    if len(self.searchspace.names()) == 0:
                        self.optimizer = SingleRun()
                    else:
                        raise Exception(
                            "Searchspace has to be empty or None to use without Optimizer."
                        )
                else:
                    raise Exception(
                        "Unknown Optimizer. Can't initialize experiment driver."
                    )
            elif isinstance(optimizer, AbstractOptimizer):
                self.optimizer = optimizer
                print("Custom Optimizer initialized.")
            else:
                raise Exception(
                    "The experiment's optimizer should either be an string indicating the name "
                    "of an implemented optimizer (such as 'randomsearch') or an instance of "
                    "maggy.optimizer.AbstractOptimizer, "
                    "but it is {0} (of type '{1}').".format(
                        str(optimizer),
                        type(optimizer).__name__))

            direction = kwargs.get("direction", "max")
            if isinstance(direction,
                          str) and direction.lower() in ["min", "max"]:
                self.direction = direction.lower()
            else:
                raise Exception(
                    "The experiment's direction should be an string (either 'min' or 'max') "
                    "but it is {0} (of type '{1}').".format(
                        str(direction),
                        type(direction).__name__))

            es_policy = kwargs.get("es_policy")
            if isinstance(es_policy, str):
                if es_policy.lower() == "median":
                    self.earlystop_check = MedianStoppingRule.earlystop_check
                elif es_policy.lower() == "none":
                    self.earlystop_check = NoStoppingRule.earlystop_check
                else:
                    raise Exception(
                        "The experiment's early stopping policy should either be a string ('median' or 'none') "
                        "or a custom policy that is an instance of maggy.earlystop.AbstractEarlyStop, "
                        "but it is {0} (of type '{1}').".format(
                            str(es_policy),
                            type(es_policy).__name__))
            elif isinstance(es_policy, AbstractEarlyStop):
                self.earlystop_check = es_policy.earlystop_check
                print("Custom Early Stopping policy initialized.")
            else:
                raise Exception(
                    "The experiment's early stopping policy should either be a string ('median' or 'none') "
                    "or a custom policy that is an instance of maggy.earlystop.AbstractEarlyStop, "
                    "but it is {0} (of type '{1}').".format(
                        str(es_policy),
                        type(es_policy).__name__))

            self.es_interval = kwargs.get("es_interval")
            self.es_min = kwargs.get("es_min")

            self.result = {
                "best_val": "n.a.",
                "num_trials": 0,
                "early_stopped": 0
            }

        elif self.experiment_type == "ablation":
            # set up an ablation study experiment
            self.earlystop_check = NoStoppingRule.earlystop_check

            ablation_study = kwargs.get("ablation_study")
            if isinstance(ablation_study, AblationStudy):
                self.ablation_study = ablation_study
            else:
                raise Exception(
                    "The experiment's ablation study configuration should be an instance of "
                    "maggy.ablation.AblationStudy, "
                    "but it is {0} (of type '{1}').".format(
                        str(ablation_study),
                        type(ablation_study).__name__))

            searchspace = kwargs.get("searchspace")
            if not searchspace:
                self.searchspace = Searchspace()
            else:
                raise Exception(
                    "The experiment's search space should be None for ablation experiments, "
                    "but it is {0} (of type '{1}').".format(
                        str(searchspace),
                        type(searchspace).__name__))

            ablator = kwargs.get("ablator")
            if isinstance(ablator, str):
                if ablator.lower() == "loco":
                    self.ablator = LOCO(ablation_study, self._final_store)
                    self.num_trials = self.ablator.get_number_of_trials()
                    if self.num_executors > self.num_trials:
                        self.num_executors = self.num_trials
                else:
                    raise Exception(
                        "The experiment's ablation study policy should either be a string ('loco') "
                        "or a custom policy that is an instance of maggy.ablation.ablation.AbstractAblator, "
                        "but it is {0} (of type '{1}').".format(
                            str(ablator),
                            type(ablator).__name__))
            elif isinstance(ablator, AbstractAblator):
                self.ablator = ablator
                print("Custom Ablator initialized. \n")
            else:
                raise Exception(
                    "The experiment's ablation study policy should either be a string ('loco') "
                    "or a custom policy that is an instance of maggy.ablation.ablation.AbstractAblator, "
                    "but it is {0} (of type '{1}').".format(
                        str(ablator),
                        type(ablator).__name__))

            self.result = {
                "best_val": "n.a.",
                "num_trials": 0,
                "early_stopped": "n.a"
            }
        else:
            raise Exception(
                "Unknown experiment type. experiment_type should be either 'optimization' or 'ablation', "
                "but it is {0}.".format(str(self.experiment_type)))

        # FINALIZE EXPERIMENT SETUP
        self.server = rpc.Server(self.num_executors)
        if not driver_secret:
            driver_secret = self._generate_secret(
                ExperimentDriver.SECRET_BYTES)
        self._secret = driver_secret
        self.job_start = datetime.now()
        self.executor_logs = ""
        self.maggy_log = ""
        self.log_lock = threading.RLock()
        self.log_file = kwargs.get("log_dir") + "/maggy.log"
        self.log_dir = kwargs.get("log_dir")
        self.exception = None

        # Open File desc for HDFS to log
        if not hopshdfs.exists(self.log_file):
            hopshdfs.dump("", self.log_file)
        self.fd = hopshdfs.open_file(self.log_file, flags="w")
Exemple #10
0
class ExperimentDriver(object):

    SECRET_BYTES = 8

    # @Moritz:
    # for now, we infer the experiment type (an optimization experiment or an ablation study)
    # using keyword arguments, and set self.experiment_type to an according string.
    # Some of these arguments are required for any maggy experiment:
    # num_trials, name, num_executors, hb_interval, description, app_dir, log_dir, trial_dir
    # while some are specific to the type of experiment. For example, if the ExperimentDriver constructor
    # is called with a `searchspace` parameter, we infer that it is a hyperparameter optimization task,
    # and if it is called with `ablationstudy` parameter, we infer that it's an ablation study.

    def __init__(self, experiment_type, **kwargs):

        global driver_secret

        # COMMON EXPERIMENT SETUP
        self._final_store = []
        self._trial_store = {}
        self.num_executors = kwargs.get("num_executors")
        self._message_q = queue.Queue()
        self.name = kwargs.get("name")
        self.experiment_done = False
        self.worker_done = False
        self.hb_interval = kwargs.get("hb_interval")
        self.description = kwargs.get("description")
        self.experiment_type = experiment_type
        self.es_interval = kwargs.get("es_interval")
        self.es_min = kwargs.get("es_min")

        # TYPE-SPECIFIC EXPERIMENT SETUP
        if self.experiment_type == "optimization":
            # set up an optimization experiment

            self.num_trials = kwargs.get("num_trials", 1)

            searchspace = kwargs.get("searchspace")
            if isinstance(searchspace, Searchspace):
                self.searchspace = searchspace
            elif searchspace is None:
                self.searchspace = Searchspace()
            else:
                raise Exception(
                    "The experiment's search space should be an instance of maggy.Searchspace, "
                    "but it is {0} (of type '{1}').".format(
                        str(searchspace),
                        type(searchspace).__name__))

            optimizer = kwargs.get("optimizer")

            if optimizer is None:
                if len(self.searchspace.names()) == 0:
                    self.optimizer = SingleRun()
                else:
                    raise Exception(
                        "Searchspace has to be empty or None to use without optimizer"
                    )
            elif isinstance(optimizer, str):
                if optimizer.lower() == "randomsearch":
                    self.optimizer = RandomSearch()
                elif optimizer.lower() == "asha":
                    self.optimizer = Asha()
                elif optimizer.lower() == "none":
                    if len(self.searchspace.names()) == 0:
                        self.optimizer = SingleRun()
                    else:
                        raise Exception(
                            "Searchspace has to be empty or None to use without Optimizer."
                        )
                else:
                    raise Exception(
                        "Unknown Optimizer. Can't initialize experiment driver."
                    )
            elif isinstance(optimizer, AbstractOptimizer):
                self.optimizer = optimizer
                print("Custom Optimizer initialized.")
            else:
                raise Exception(
                    "The experiment's optimizer should either be an string indicating the name "
                    "of an implemented optimizer (such as 'randomsearch') or an instance of "
                    "maggy.optimizer.AbstractOptimizer, "
                    "but it is {0} (of type '{1}').".format(
                        str(optimizer),
                        type(optimizer).__name__))

            direction = kwargs.get("direction", "max")
            if isinstance(direction,
                          str) and direction.lower() in ["min", "max"]:
                self.direction = direction.lower()
            else:
                raise Exception(
                    "The experiment's direction should be an string (either 'min' or 'max') "
                    "but it is {0} (of type '{1}').".format(
                        str(direction),
                        type(direction).__name__))

            es_policy = kwargs.get("es_policy")
            if isinstance(es_policy, str):
                if es_policy.lower() == "median":
                    self.earlystop_check = MedianStoppingRule.earlystop_check
                elif es_policy.lower() == "none":
                    self.earlystop_check = NoStoppingRule.earlystop_check
                else:
                    raise Exception(
                        "The experiment's early stopping policy should either be a string ('median' or 'none') "
                        "or a custom policy that is an instance of maggy.earlystop.AbstractEarlyStop, "
                        "but it is {0} (of type '{1}').".format(
                            str(es_policy),
                            type(es_policy).__name__))
            elif isinstance(es_policy, AbstractEarlyStop):
                self.earlystop_check = es_policy.earlystop_check
                print("Custom Early Stopping policy initialized.")
            else:
                raise Exception(
                    "The experiment's early stopping policy should either be a string ('median' or 'none') "
                    "or a custom policy that is an instance of maggy.earlystop.AbstractEarlyStop, "
                    "but it is {0} (of type '{1}').".format(
                        str(es_policy),
                        type(es_policy).__name__))

            self.es_interval = kwargs.get("es_interval")
            self.es_min = kwargs.get("es_min")

            self.result = {
                "best_val": "n.a.",
                "num_trials": 0,
                "early_stopped": 0
            }

        elif self.experiment_type == "ablation":
            # set up an ablation study experiment
            self.earlystop_check = NoStoppingRule.earlystop_check

            ablation_study = kwargs.get("ablation_study")
            if isinstance(ablation_study, AblationStudy):
                self.ablation_study = ablation_study
            else:
                raise Exception(
                    "The experiment's ablation study configuration should be an instance of "
                    "maggy.ablation.AblationStudy, "
                    "but it is {0} (of type '{1}').".format(
                        str(ablation_study),
                        type(ablation_study).__name__))

            searchspace = kwargs.get("searchspace")
            if not searchspace:
                self.searchspace = Searchspace()
            else:
                raise Exception(
                    "The experiment's search space should be None for ablation experiments, "
                    "but it is {0} (of type '{1}').".format(
                        str(searchspace),
                        type(searchspace).__name__))

            ablator = kwargs.get("ablator")
            if isinstance(ablator, str):
                if ablator.lower() == "loco":
                    self.ablator = LOCO(ablation_study, self._final_store)
                    self.num_trials = self.ablator.get_number_of_trials()
                    if self.num_executors > self.num_trials:
                        self.num_executors = self.num_trials
                else:
                    raise Exception(
                        "The experiment's ablation study policy should either be a string ('loco') "
                        "or a custom policy that is an instance of maggy.ablation.ablation.AbstractAblator, "
                        "but it is {0} (of type '{1}').".format(
                            str(ablator),
                            type(ablator).__name__))
            elif isinstance(ablator, AbstractAblator):
                self.ablator = ablator
                print("Custom Ablator initialized. \n")
            else:
                raise Exception(
                    "The experiment's ablation study policy should either be a string ('loco') "
                    "or a custom policy that is an instance of maggy.ablation.ablation.AbstractAblator, "
                    "but it is {0} (of type '{1}').".format(
                        str(ablator),
                        type(ablator).__name__))

            self.result = {
                "best_val": "n.a.",
                "num_trials": 0,
                "early_stopped": "n.a"
            }
        else:
            raise Exception(
                "Unknown experiment type. experiment_type should be either 'optimization' or 'ablation', "
                "but it is {0}.".format(str(self.experiment_type)))

        # FINALIZE EXPERIMENT SETUP
        self.server = rpc.Server(self.num_executors)
        if not driver_secret:
            driver_secret = self._generate_secret(
                ExperimentDriver.SECRET_BYTES)
        self._secret = driver_secret
        self.job_start = datetime.now()
        self.executor_logs = ""
        self.maggy_log = ""
        self.log_lock = threading.RLock()
        self.log_file = kwargs.get("log_dir") + "/maggy.log"
        self.log_dir = kwargs.get("log_dir")
        self.exception = None

        # Open File desc for HDFS to log
        if not hopshdfs.exists(self.log_file):
            hopshdfs.dump("", self.log_file)
        self.fd = hopshdfs.open_file(self.log_file, flags="w")

    def init(self, job_start):

        self.server_addr = self.server.start(self)

        self.job_start = job_start

        if self.experiment_type == "optimization":
            # Set references to data in optimizer
            self.optimizer.num_trials = self.num_trials
            self.optimizer.searchspace = self.searchspace
            self.optimizer.trial_store = self._trial_store
            self.optimizer.final_store = self._final_store
            self.optimizer.direction = self.direction
            self.optimizer.initialize()
        elif self.experiment_type == "ablation":
            self.ablator.initialize()

        self._start_worker()

    def finalize(self, job_end):

        results = ""

        if self.experiment_type == "optimization":

            _ = self.optimizer.finalize_experiment(self._final_store)

            self.job_end = job_end

            self.duration = experiment_utils._seconds_to_milliseconds(
                self.job_end - self.job_start)

            self.duration_str = experiment_utils._time_diff(
                self.job_start, self.job_end)

            results = ("\n------ " + self.optimizer.name() +
                       " Results ------ direction(" + self.direction + ") \n"
                       "BEST combination " +
                       json.dumps(self.result["best_hp"]) + " -- metric " +
                       str(self.result["best_val"]) + "\n"
                       "WORST combination " +
                       json.dumps(self.result["worst_hp"]) + " -- metric " +
                       str(self.result["worst_val"]) + "\n"
                       "AVERAGE metric -- " + str(self.result["avg"]) + "\n"
                       "EARLY STOPPED Trials -- " +
                       str(self.result["early_stopped"]) + "\n"
                       "Total job time " + self.duration_str + "\n")

        elif self.experiment_type == "ablation":

            _ = self.ablator.finalize_experiment(self._final_store)
            self.job_end = job_end

            self.duration = experiment_utils._seconds_to_milliseconds(
                self.job_end - self.job_start)

            self.duration_str = experiment_utils._time_diff(
                self.job_start, self.job_end)

            results = ("\n------ " + self.ablator.name() +
                       " Results ------ \n" + "BEST Config Excludes " +
                       json.dumps(self.result["best_config"]) + " -- metric " +
                       str(self.result["best_val"]) + "\n" +
                       "WORST Config Excludes " +
                       json.dumps(self.result["worst_config"]) +
                       " -- metric " + str(self.result["worst_val"]) + "\n" +
                       "AVERAGE metric -- " + str(self.result["avg"]) + "\n" +
                       "Total Job Time " + self.duration_str + "\n")

        print(results)

        self._log(results)

        hopshdfs.dump(
            json.dumps(self.result, default=util.json_default_numpy),
            self.log_dir + "/result.json",
        )
        sc = hopsutil._find_spark().sparkContext
        hopshdfs.dump(self.json(sc), self.log_dir + "/maggy.json")

        return self.result

    def get_trial(self, trial_id):
        return self._trial_store[trial_id]

    def add_trial(self, trial):
        self._trial_store[trial.trial_id] = trial

    def add_message(self, msg):
        self._message_q.put(msg)

    def _start_worker(self):
        def _target_function(self):

            try:
                time_earlystop_check = (
                    time.time()
                )  # only used by earlystop-supporting experiments

                while not self.worker_done:
                    trial = None
                    # get a message
                    try:
                        msg = self._message_q.get_nowait()
                    except queue.Empty:
                        msg = {"type": None}

                    if self.earlystop_check != NoStoppingRule.earlystop_check:
                        if (time.time() -
                                time_earlystop_check) >= self.es_interval:
                            time_earlystop_check = time.time()

                            # pass currently running trials to early stop component
                            if len(self._final_store) > self.es_min:
                                self._log("Check for early stopping.")
                                try:
                                    to_stop = self.earlystop_check(
                                        self._trial_store,
                                        self._final_store,
                                        self.direction,
                                    )
                                except Exception as e:
                                    self._log(e)
                                    to_stop = []
                                if len(to_stop) > 0:
                                    self._log(
                                        "Trials to stop: {}".format(to_stop))
                                for trial_id in to_stop:
                                    self.get_trial(trial_id).set_early_stop()

                    # depending on message do the work
                    # 1. METRIC
                    if msg["type"] == "METRIC":
                        # append executor logs if in the message
                        logs = msg.get("logs", None)
                        if logs is not None:
                            with self.log_lock:
                                self.executor_logs = self.executor_logs + logs

                        if msg["trial_id"] is not None and msg[
                                "data"] is not None:
                            self.get_trial(msg["trial_id"]).append_metric(
                                msg["data"])

                    # 2. BLACKLIST the trial
                    elif msg["type"] == "BLACK":
                        trial = self.get_trial(msg["trial_id"])
                        with trial.lock:
                            trial.status = Trial.SCHEDULED
                            self.server.reservations.assign_trial(
                                msg["partition_id"], msg["trial_id"])

                    # 3. FINAL
                    elif msg["type"] == "FINAL":
                        # set status
                        # get trial only once
                        trial = self.get_trial(msg["trial_id"])

                        logs = msg.get("logs", None)
                        if logs is not None:
                            with self.log_lock:
                                self.executor_logs = self.executor_logs + logs

                        # finalize the trial object
                        with trial.lock:
                            trial.status = Trial.FINALIZED
                            trial.final_metric = msg["data"]
                            trial.duration = experiment_utils._seconds_to_milliseconds(
                                time.time() - trial.start)

                        # move trial to the finalized ones
                        self._final_store.append(trial)
                        self._trial_store.pop(trial.trial_id)

                        # update result dictionary
                        self._update_result(trial)
                        # keep for later in case tqdm doesn't work
                        self.maggy_log = self._update_maggy_log()
                        self._log(self.maggy_log)

                        hopshdfs.dump(
                            trial.to_json(),
                            self.log_dir + "/" + trial.trial_id +
                            "/trial.json",
                        )

                        # assign new trial
                        if self.experiment_type == "optimization":
                            trial = self.optimizer.get_suggestion(trial)
                        elif self.experiment_type == "ablation":
                            trial = self.ablator.get_trial(trial)
                        if trial is None:
                            self.server.reservations.assign_trial(
                                msg["partition_id"], None)
                            self.experiment_done = True
                        elif trial == "IDLE":
                            self.add_message({
                                "type":
                                "IDLE",
                                "partition_id":
                                msg["partition_id"],
                                "idle_start":
                                time.time(),
                            })
                            self.server.reservations.assign_trial(
                                msg["partition_id"], None)
                        else:
                            with trial.lock:
                                trial.start = time.time()
                                trial.status = Trial.SCHEDULED
                                self.server.reservations.assign_trial(
                                    msg["partition_id"], trial.trial_id)
                                self.add_trial(trial)

                    # 4. Let executor be idle
                    elif msg["type"] == "IDLE":
                        # execute only every 0.1 seconds but do not block thread
                        if (self.experiment_type == "optimization"
                                and time.time() - msg["idle_start"] > 0.1):
                            trial = self.optimizer.get_suggestion()
                            if trial is None:
                                self.server.reservations.assign_trial(
                                    msg["partition_id"], None)
                                self.experiment_done = True
                            elif trial == "IDLE":
                                # reset timeout
                                msg["idle_start"] = time.time()
                                self.add_message(msg)
                            else:
                                with trial.lock:
                                    trial.start = time.time()
                                    trial.status = Trial.SCHEDULED
                                    self.server.reservations.assign_trial(
                                        msg["partition_id"], trial.trial_id)
                                    self.add_trial(trial)
                        elif self.experiment_type == "optimization":
                            self.add_message(msg)

                    # 4. REG
                    elif msg["type"] == "REG":
                        if self.experiment_type == "optimization":
                            trial = self.optimizer.get_suggestion()
                        elif self.experiment_type == "ablation":
                            trial = self.ablator.get_trial()
                        if trial is None:
                            self.server.reservations.assign_trial(
                                msg["partition_id"], None)
                            self.experiment_done = True
                        elif trial == "IDLE":
                            # reset timeout
                            msg["idle_start"] = time.time()
                            self.add_message(msg)
                        else:
                            with trial.lock:
                                trial.start = time.time()
                                trial.status = Trial.SCHEDULED
                                self.server.reservations.assign_trial(
                                    msg["partition_id"], trial.trial_id)
                                self.add_trial(trial)
            except Exception as exc:
                # Exception can't be propagated to parent thread
                # therefore log the exception and fail experiment
                self._log(exc)
                self.exception = exc
                self.server.stop()

        t = threading.Thread(target=_target_function, args=(self, ))
        t.daemon = True
        t.start()

    def stop(self):
        """Stop the Driver's worker thread and server."""
        self.worker_done = True
        self.server.stop()
        self.fd.flush()
        self.fd.close()

    def json(self, sc):
        """Get all relevant experiment information in JSON format.
        """
        user = None
        if hopsconstants.ENV_VARIABLES.HOPSWORKS_USER_ENV_VAR in os.environ:
            user = os.environ[
                hopsconstants.ENV_VARIABLES.HOPSWORKS_USER_ENV_VAR]

        experiment_json = {
            "project":
            hopshdfs.project_name(),
            "user":
            user,
            "name":
            self.name,
            "module":
            "maggy",
            "app_id":
            str(sc.applicationId),
            "start":
            time.strftime("%Y-%m-%dT%H:%M:%S", time.localtime(self.job_start)),
            "memory_per_executor":
            str(sc._conf.get("spark.executor.memory")),
            "gpus_per_executor":
            str(sc._conf.get("spark.executor.gpus")),
            "executors":
            self.num_executors,
            "logdir":
            self.log_dir,
            # 'versioned_resources': versioned_resources,
            "description":
            self.description,
            "experiment_type":
            self.experiment_type,
        }

        if self.experiment_type == "optimization":
            experiment_json["hyperparameter_space"] = json.dumps(
                self.searchspace.to_dict())
            experiment_json["function"] = self.optimizer.name()
        elif self.experiment_type == "ablation":
            experiment_json["ablation_study"] = json.dumps(
                self.ablation_study.to_dict())
            experiment_json["ablator"] = self.ablator.name()

        if self.experiment_done:
            experiment_json["status"] = "FINISHED"
            experiment_json["finished"] = time.strftime(
                "%Y-%m-%dT%H:%M:%S", time.localtime(self.job_end))
            experiment_json["duration"] = self.duration
            if self.experiment_type == "optimization":
                experiment_json["hyperparameter"] = json.dumps(
                    self.result["best_hp"])
            experiment_json["metric"] = self.result["best_val"]

        else:
            experiment_json["status"] = "RUNNING"

        return json.dumps(experiment_json, default=util.json_default_numpy)

    def _generate_secret(self, nbytes):
        """Generates a secret to be used by all clients during the experiment
        to authenticate their messages with the experiment driver.
        """
        return secrets.token_hex(nbytes=nbytes)

    def _update_result(self, trial):
        """Given a finalized trial updates the current result's best and
        worst trial.
        """

        metric = trial.final_metric
        param_string = trial.params
        trial_id = trial.trial_id

        if self.experiment_type == "optimization":
            # First finalized trial
            if self.result.get("best_id", None) is None:
                self.result = {
                    "best_id": trial_id,
                    "best_val": metric,
                    "best_hp": param_string,
                    "worst_id": trial_id,
                    "worst_val": metric,
                    "worst_hp": param_string,
                    "avg": metric,
                    "metric_list": [metric],
                    "num_trials": 1,
                    "early_stopped": 0,
                }

                if trial.early_stop:
                    self.result["early_stopped"] += 1

                return
            if self.direction == "max":
                if metric > self.result["best_val"]:
                    self.result["best_val"] = metric
                    self.result["best_id"] = trial_id
                    self.result["best_hp"] = param_string
                if metric < self.result["worst_val"]:
                    self.result["worst_val"] = metric
                    self.result["worst_id"] = trial_id
                    self.result["worst_hp"] = param_string
            elif self.direction == "min":
                if metric < self.result["best_val"]:
                    self.result["best_val"] = metric
                    self.result["best_id"] = trial_id
                    self.result["best_hp"] = param_string
                if metric > self.result["worst_val"]:
                    self.result["worst_val"] = metric
                    self.result["worst_id"] = trial_id
                    self.result["worst_hp"] = param_string

        elif self.experiment_type == "ablation":

            # pop function values and trial_type from parameters, since we don't need them
            param_string.pop("dataset_function", None)
            param_string.pop("model_function", None)
            # First finalized trial
            if self.result.get("best_id", None) is None:
                self.result = {
                    "best_id": trial_id,
                    "best_val": metric,
                    "best_config": param_string,
                    "worst_id": trial_id,
                    "worst_val": metric,
                    "worst_config": param_string,
                    "avg": metric,
                    "metric_list": [metric],
                    "num_trials": 1,
                    "early_stopped": 0,
                }
                return

            # for ablation we always consider 'max' as "the direction"
            if metric > self.result["best_val"]:
                self.result["best_id"] = trial_id
                self.result["best_val"] = metric
                self.result["best_config"] = param_string
            elif metric < self.result["worst_val"]:
                self.result["worst_id"] = trial_id
                self.result["worst_val"] = metric
                self.result["worst_config"] = param_string

        # update results and average regardless of experiment type
        self.result["metric_list"].append(metric)
        self.result["num_trials"] += 1
        self.result["avg"] = sum(self.result["metric_list"]) / float(
            len(self.result["metric_list"]))

        if trial.early_stop:
            self.result["early_stopped"] += 1

    def _update_maggy_log(self):
        """Creates the status of a maggy experiment with a progress bar.
        """
        finished = self.result["num_trials"]
        log = ""

        if self.experiment_type == "optimization":
            log = ("Maggy Optimization " + str(finished) + "/" +
                   str(self.num_trials) + " (" +
                   str(self.result["early_stopped"]) + ") " +
                   util._progress_bar(finished, self.num_trials) + " - BEST " +
                   json.dumps(self.result["best_hp"]) + " - metric " +
                   str(self.result["best_val"]))

        elif self.experiment_type == "ablation":
            log = ("Maggy Ablation " + str(finished) + "/" +
                   str(self.num_trials) +
                   util._progress_bar(finished, self.num_trials) +
                   " - BEST Excludes" +
                   json.dumps(self.result["best_config"]) + " - metric " +
                   str(self.result["best_val"]))

        return log

    def _get_logs(self):
        """Return current experiment status and executor logs to send them to
        spark magic.
        """
        with self.log_lock:
            temp = self.executor_logs
            # clear the executor logs since they are being sent
            self.executor_logs = ""
            return self.result, temp

    def _log(self, log_msg):
        """Logs a string to the maggy driver log file.
        """
        msg = datetime.now().isoformat() + ": " + str(log_msg)
        self.fd.write((msg + "\n").encode())