예제 #1
0
    def test_timeout_without_job_cancellation(self):
        timeout = 4
        spark_trials = SparkTrials(parallelism=1, timeout=timeout)
        spark_trials._spark_supports_job_cancelling = False

        def fn(x):
            time.sleep(0.5)
            return x

        with patch_logger('hyperopt-spark', logging.DEBUG) as output:
            fmin(fn=fn,
                 space=hp.uniform('x', -1, 1),
                 algo=anneal.suggest,
                 max_evals=10,
                 trials=spark_trials,
                 max_queue_len=1,
                 show_progressbar=False,
                 return_argmin=False)
            log_output = output.getvalue().strip()

            self.assertTrue(spark_trials._fmin_cancelled)
            self.assertEqual(spark_trials._fmin_cancelled_reason,
                             "fmin run timeout")
            self.assertGreater(spark_trials.count_successful_trials(), 0)
            self.assertGreater(spark_trials.count_cancelled_trials(), 0)
            self.assertIn(
                "fmin is cancelled, so new trials will not be launched",
                log_output,
                """ "fmin is cancelled, so new trials will not be launched" missing from log:
                {log_output}""".format(log_output=log_output))
            self.assertIn(
                "SparkTrials will block", log_output,
                """ "SparkTrials will block" missing from log: {log_output}""".
                format(log_output=log_output))
            self.assert_task_succeeded(log_output, 0)
예제 #2
0
    def test_all_failed_trials(self):
        spark_trials = SparkTrials(parallelism=1)
        with patch_logger("hyperopt-spark", logging.DEBUG) as output:
            fmin(
                fn=fn_succeed_within_range,
                space=hp.uniform("x", 5, 10),
                algo=anneal.suggest,
                max_evals=1,
                trials=spark_trials,
                return_argmin=False,
            )
            log_output = output.getvalue().strip()

            self.assertEqual(spark_trials.count_failed_trials(), 1)
            self.assert_task_failed(log_output, 0)

        spark_trials = SparkTrials(parallelism=4)
        # Here return_argmin is True (by default) and an exception should be thrown
        with self.assertRaisesRegexp(Exception,
                                     "There are no evaluation tasks"):
            fmin(
                fn=fn_succeed_within_range,
                space=hp.uniform("x", 5, 8),
                algo=anneal.suggest,
                max_evals=2,
                trials=spark_trials,
            )
예제 #3
0
    def test_task_maxFailures_warning(self):
        # With quick trials, do not print warning.
        with patch_logger('hyperopt-spark', logging.DEBUG) as output:
            fmin(fn=fn_succeed_within_range,
                 space=hp.uniform('x', -1, 1),
                 algo=anneal.suggest,
                 max_evals=1,
                 trials=SparkTrials())
            log_output = output.getvalue().strip()
            self.assertNotIn(
                "spark.task.maxFailures", log_output,
                """ "spark.task.maxFailures" warning should not appear in log: {log_output}"""
                .format(log_output=log_output))

        # With slow trials, print warning.
        ORIG_LONG_TRIAL_DEFINITION_SECONDS = _SparkFMinState._LONG_TRIAL_DEFINITION_SECONDS
        try:
            _SparkFMinState._LONG_TRIAL_DEFINITION_SECONDS = 0
            with patch_logger('hyperopt-spark', logging.DEBUG) as output:
                fmin(fn=fn_succeed_within_range,
                     space=hp.uniform('x', -1, 1),
                     algo=anneal.suggest,
                     max_evals=1,
                     trials=SparkTrials())
                log_output = output.getvalue().strip()
                self.assertIn(
                    "spark.task.maxFailures", log_output,
                    """ "spark.task.maxFailures" warning missing from log: {log_output}"""
                    .format(log_output=log_output))
        finally:
            _SparkFMinState._LONG_TRIAL_DEFINITION_SECONDS = ORIG_LONG_TRIAL_DEFINITION_SECONDS
예제 #4
0
    def test_trial_run_info(self):
        spark_trials = SparkTrials(parallelism=4)

        with patch_logger("hyperopt-spark") as output:
            fmin(
                fn=fn_succeed_within_range,
                space=hp.uniform("x", -5, 5),
                algo=anneal.suggest,
                max_evals=8,
                return_argmin=False,
                trials=spark_trials,
                rstate=np.random.RandomState(99),
            )
            self.check_run_status(spark_trials,
                                  output,
                                  num_total=8,
                                  num_success=7,
                                  num_failure=1)

        expected_result = {"loss": 1.0, "status": "ok"}
        for trial in spark_trials._dynamic_trials:
            if trial["state"] == base.JOB_STATE_DONE:
                self.assertEqual(
                    trial["result"],
                    expected_result,
                    "Wrong result has been saved: Expected {e} but got {r}.".
                    format(e=expected_result, r=trial["result"]),
                )
            elif trial["state"] == base.JOB_STATE_ERROR:
                err_message = trial["misc"]["error"][1]
                self.assertIn(
                    "RuntimeError",
                    err_message,
                    "Missing {e} in {r}.".format(e="RuntimeError",
                                                 r=err_message),
                )
                self.assertIn(
                    "Traceback (most recent call last)",
                    err_message,
                    "Missing {e} in {r}.".format(e="Traceback", r=err_message),
                )

        num_success = spark_trials.count_by_state_unsynced(base.JOB_STATE_DONE)
        self.assertEqual(
            num_success,
            7,
            "Wrong number of successful trial runs: Expected {e} but got {r}.".
            format(e=7, r=num_success),
        )
        num_failure = spark_trials.count_by_state_unsynced(
            base.JOB_STATE_ERROR)
        self.assertEqual(
            num_failure,
            1,
            "Wrong number of failed trial runs: Expected {e} but got {r}.".
            format(e=1, r=num_failure),
        )
예제 #5
0
 def test_invalid_timeout(self):
     with self.assertRaisesRegexp(
             Exception,
             "timeout argument should be None or a positive value. Given value: -1",
     ):
         SparkTrials(parallelism=4, timeout=-1)
     with self.assertRaisesRegexp(
             Exception,
             "timeout argument should be None or a positive value. Given value: True",
     ):
         SparkTrials(parallelism=4, timeout=True)
예제 #6
0
    def test_parallelism_arg(self):
        default_parallelism = 2

        # Test requested_parallelism is None or negative values.
        for requested_parallelism in [None, -1]:
            with patch_logger("hyperopt-spark") as output:
                parallelism = SparkTrials._decide_parallelism(
                    requested_parallelism=requested_parallelism,
                    spark_default_parallelism=default_parallelism,
                )
                self.assertEqual(
                    parallelism,
                    default_parallelism,
                    "Failed to set parallelism to be default parallelism ({p})"
                    " ({e})".format(p=parallelism, e=default_parallelism),
                )
                log_output = output.getvalue().strip()
                self.assertIn(
                    "Because the requested parallelism was None or a non-positive value, "
                    "parallelism will be set to ({d})".format(d=default_parallelism),
                    log_output,
                    """set to default parallelism missing from log: {log_output}""".format(
                        log_output=log_output
                    ),
                )

        # Test requested_parallelism exceeds hard cap
        with patch_logger("hyperopt-spark") as output:
            parallelism = SparkTrials._decide_parallelism(
                requested_parallelism=SparkTrials.MAX_CONCURRENT_JOBS_ALLOWED + 1,
                spark_default_parallelism=default_parallelism,
            )
            self.assertEqual(
                parallelism,
                SparkTrials.MAX_CONCURRENT_JOBS_ALLOWED,
                "Failed to limit parallelism ({p}) to MAX_CONCURRENT_JOBS_ALLOWED ({e})".format(
                    p=parallelism, e=SparkTrials.MAX_CONCURRENT_JOBS_ALLOWED
                ),
            )
            log_output = output.getvalue().strip()
            self.assertIn(
                "SparkTrials.MAX_CONCURRENT_JOBS_ALLOWED ({c})".format(
                    c=SparkTrials.MAX_CONCURRENT_JOBS_ALLOWED
                ),
                log_output,
                """MAX_CONCURRENT_JOBS_ALLOWED value missing from log: {log_output}""".format(
                    log_output=log_output
                ),
            )
예제 #7
0
    def test_pin_thread_on(self):
        if not self._pin_mode_enabled:
            raise unittest.SkipTest()

        spark_trials = SparkTrials(parallelism=2)
        self.assertTrue(spark_trials._spark_pinned_threads_enabled)
        self.assertTrue(spark_trials._spark_supports_job_cancelling)
        fmin(
            fn=lambda x: x + 1,
            space=hp.uniform("x", -1, 1),
            algo=rand.suggest,
            max_evals=5,
            trials=spark_trials,
        )
        self.assertEqual(spark_trials.count_successful_trials(), 5)
예제 #8
0
def train_keras_model(train_desc,
                      test_desc,
                      train_cat,
                      test_cat,
                      distributed=False,
                      shop="all"):
    with mlflow.start_run(run_name="keras", nested=True):
        if (distributed):
            from hyperopt import SparkTrials
            trials = SparkTrials()
        else:
            trials = Trials()
        run_keras = RunKeras(train_desc, test_desc, train_cat, test_cat)
        argmin = fmin(run_keras.keras_model,
                      get_search_space(),
                      algo=tpe.suggest,
                      max_evals=10,
                      show_progressbar=True,
                      trials=trials)
        best_params = space_eval(get_search_space(), argmin)
        best_model, f1 = run_keras.train_model(best_params)
        #mlflow.keras.log_model(best_model, 'model')
        mlflow.log_metric("f1", f1)
        #mlflow.log_metric("delta_version", delta_version)
        mlflow.set_tag("shop", shop)
        mlflow.set_tag("model", "keras_classifier")
        return argmin
예제 #9
0
    def test_all_successful_trials(self):
        spark_trials = SparkTrials(parallelism=1)
        with patch_logger('hyperopt-spark', logging.DEBUG) as output:
            fmin(fn=fn_succeed_within_range,
                 space=hp.uniform('x', -1, 1),
                 algo=anneal.suggest,
                 max_evals=1,
                 trials=spark_trials)
            log_output = output.getvalue().strip()

            self.assertEqual(spark_trials.count_successful_trials(), 1)
            self.assertIn(
                "fmin thread exits normally", log_output,
                """Debug info "fmin thread exits normally" missing from log: {log_output}"""
                .format(log_output=log_output))
            self.assert_task_succeeded(log_output, 0)
예제 #10
0
    def test_no_retry_for_long_tasks(self):
        NUM_TRIALS = 2
        output_dir = tempfile.mkdtemp()

        def fn(_):
            with open(os.path.join(output_dir, str(timeit.default_timer())),
                      "w") as f:
                f.write("1")
            raise Exception("Failed!")

        spark_trials = SparkTrials(parallelism=2)
        try:
            fmin(
                fn=fn,
                space=hp.uniform("x", 0, 1),
                algo=anneal.suggest,
                max_evals=NUM_TRIALS,
                trials=spark_trials,
                show_progressbar=False,
                return_argmin=False,
            )
        except BaseException as e:
            self.assertEqual(
                "There are no evaluation tasks, cannot return argmin of task losses.",
                str(e),
            )

        call_count = len(os.listdir(output_dir))
        self.assertEqual(NUM_TRIALS, call_count)
예제 #11
0
 def run_fmin(self,
              online=True,
              upload=True,
              objective=objective_success,
              max_evals=3,
              wrap=None,
              **kwargs):
     project = 'hyperopt-integration-test'
     if wrap == 'mongo':
         trials = MongoTrials('mongo://mongodb:27017/foo_db/jobs',
                              exp_key=str(uuid.uuid4()))
     elif wrap == 'spark':
         trials = SparkTrials()
     else:
         trials = None
     trials = SigOptTrials(project=project,
                           online=(online and upload),
                           trials=trials)
     try:
         best = fmin(objective,
                     space={
                         'x': hp.uniform('x', -10, 10),
                         'y': hp.uniform('y', -10, 10)
                     },
                     algo=tpe.suggest,
                     max_evals=max_evals,
                     trials=trials,
                     **kwargs)
     except hyperopt.exceptions.AllTrialsFailed:
         best = None
     if upload and not online:
         trials.upload()
     return trials, best
예제 #12
0
    def test_accepting_sparksession(self):
        spark_trials = SparkTrials(
            parallelism=2, spark_session=SparkSession.builder.getOrCreate())

        fmin(fn=lambda x: x + 1,
             space=hp.uniform('x', 5, 8),
             algo=anneal.suggest,
             max_evals=2,
             trials=spark_trials)
예제 #13
0
 def test_exception_when_spark_not_available(self):
     import hyperopt
     orig_have_spark = hyperopt.spark._have_spark
     hyperopt.spark._have_spark = False
     try:
         with self.assertRaisesRegexp(Exception, "cannot import pyspark"):
             SparkTrials(parallelism=4)
     finally:
         hyperopt.spark._have_spark = orig_have_spark
예제 #14
0
    def test_trial_run_info(self):
        spark_trials = SparkTrials(parallelism=4)

        with patch_logger('hyperopt-spark') as output:
            fmin(fn=fn_succeed_within_range,
                 space=hp.uniform('x', -5, 5),
                 algo=anneal.suggest,
                 max_evals=8,
                 return_argmin=False,
                 trials=spark_trials)
            self.check_run_status(spark_trials,
                                  output,
                                  num_total=8,
                                  num_success=7,
                                  num_failure=1)

        expected_result = {'loss': 1.0, 'status': 'ok'}
        for trial in spark_trials._dynamic_trials:
            if trial['state'] == base.JOB_STATE_DONE:
                self.assertEqual(
                    trial['result'], expected_result,
                    "Wrong result has been saved: Expected {e} but got {r}.".
                    format(e=expected_result, r=trial['result']))
            elif trial['state'] == base.JOB_STATE_ERROR:
                err_message = trial['misc']['error'][1]
                self.assertIn(
                    "RuntimeError", err_message,
                    "Missing {e} in {r}.".format(e="RuntimeError",
                                                 r=err_message))

        num_success = spark_trials.count_by_state_unsynced(base.JOB_STATE_DONE)
        self.assertEqual(
            num_success, 7,
            "Wrong number of successful trial runs: Expected {e} but got {r}.".
            format(e=7, r=num_success))
        num_failure = spark_trials.count_by_state_unsynced(
            base.JOB_STATE_ERROR)
        self.assertEqual(
            num_failure, 1,
            "Wrong number of failed trial runs: Expected {e} but got {r}.".
            format(e=1, r=num_failure))
def train(df, experiment_name, run_name):
    mlflow.set_experiment(experiment_name)

    data = df.toPandas()
    X_train, X_test, y_train, y_test = train_test_split(data.drop(["quality"], axis=1), data[["quality"]].values.ravel(), random_state=42)

    search_space = {
        'n_estimators': hp.uniform('n_estimators', 10, 100),
        'min_samples_leaf': hp.uniform('min_samples_leaf', 1, 20),
        'max_depth': hp.uniform('max_depth', 2, 10),
    }

    spark_trials = SparkTrials(parallelism=4)

    with mlflow.start_run(run_name=run_name):
      fmin(
          fn=evaluate_hyperparams_wrapper(X_train, X_test, y_train, y_test),
          space=search_space,
          algo=tpe.suggest,
          max_evals=10,
          trials=spark_trials,
      )
예제 #16
0
    def test_parallelism_arg(self):
        # Computing max_num_concurrent_tasks
        max_num_concurrent_tasks = self.sc._jsc.sc().maxNumConcurrentTasks()
        self.assertEqual(
            max_num_concurrent_tasks,
            BaseSparkContext.NUM_SPARK_EXECUTORS,
            "max_num_concurrent_tasks ({c}) did not equal "
            "BaseSparkContext.NUM_SPARK_EXECUTORS ({e})".format(
                c=max_num_concurrent_tasks, e=BaseSparkContext.NUM_SPARK_EXECUTORS
            ),
        )

        max_num_concurrent_tasks = 4
        # Given invalidly small parallelism
        with patch_logger("hyperopt-spark") as output:
            parallelism = SparkTrials._decide_parallelism(max_num_concurrent_tasks, -1)
            self.assertEqual(
                parallelism,
                max_num_concurrent_tasks,
                "Failed to default parallelism ({p}) to max_num_concurrent_tasks"
                " ({e})".format(p=parallelism, e=max_num_concurrent_tasks),
            )
            log_output = output.getvalue().strip()
            self.assertIn(
                "invalid value (-1)",
                log_output,
                """Invalid parallelism value -1 missing from log: {log_output}""".format(
                    log_output=log_output
                ),
            )
            self.assertIn(
                "max_num_concurrent_tasks ({c})".format(c=max_num_concurrent_tasks),
                log_output,
                """max_num_concurrent_tasks value missing from log: {log_output}""".format(
                    log_output=log_output
                ),
            )

        # Given invalidly large parallelism
        with patch_logger("hyperopt-spark") as output:
            parallelism = SparkTrials._decide_parallelism(
                max_num_concurrent_tasks, max_num_concurrent_tasks + 1
            )
            self.assertEqual(
                parallelism,
                max_num_concurrent_tasks,
                "Failed to limit parallelism ({p}) to max_num_concurrent_tasks"
                " ({e})".format(p=parallelism, e=max_num_concurrent_tasks),
            )
            log_output = output.getvalue().strip()
            self.assertIn(
                "parallelism ({p}) is greater".format(p=max_num_concurrent_tasks + 1),
                log_output,
                """User-specified parallelism ({p}) missing from log: {log_output}""".format(
                    p=max_num_concurrent_tasks + 1, log_output=log_output
                ),
            )
            self.assertIn(
                "max_num_concurrent_tasks ({c})".format(c=max_num_concurrent_tasks),
                log_output,
                """max_num_concurrent_tasks value missing from log: {log_output}""".format(
                    log_output=log_output
                ),
            )

        # Given valid parallelism
        parallelism = SparkTrials._decide_parallelism(max_num_concurrent_tasks, None)
        self.assertEqual(
            parallelism,
            max_num_concurrent_tasks,
            "The default parallelism ({p}) did not equal max_num_concurrent_tasks"
            " ({e})".format(p=parallelism, e=max_num_concurrent_tasks),
        )

        # Given invalid parallelism relative to hard cap
        with patch_logger("hyperopt-spark") as output:
            parallelism = SparkTrials._decide_parallelism(
                max_num_concurrent_tasks=SparkTrials.MAX_CONCURRENT_JOBS_ALLOWED + 1,
                parallelism=None,
            )
            self.assertEqual(
                parallelism,
                SparkTrials.MAX_CONCURRENT_JOBS_ALLOWED,
                "Failed to limit parallelism ({p}) to MAX_CONCURRENT_JOBS_ALLOWED ({e})".format(
                    p=parallelism, e=SparkTrials.MAX_CONCURRENT_JOBS_ALLOWED
                ),
            )
            log_output = output.getvalue().strip()
            self.assertIn(
                "SparkTrials.MAX_CONCURRENT_JOBS_ALLOWED ({c})".format(
                    c=SparkTrials.MAX_CONCURRENT_JOBS_ALLOWED
                ),
                log_output,
                """MAX_CONCURRENT_JOBS_ALLOWED value missing from log: {log_output}""".format(
                    log_output=log_output
                ),
            )
예제 #17
0
# COMMAND ----------

# MAGIC %md
# MAGIC ### Hyperparameter Tuning
# MAGIC User HyperOpt with Spark trials to run distributed hyperparameters tuning across workers in parallel

# COMMAND ----------

spark.conf.set("spark.databricks.mlflow.trackHyperopt.enabled", False)

# COMMAND ----------

from functools import partial
from hyperopt import SparkTrials, hp, fmin, tpe, STATUS_FAIL, STATUS_OK

spark_trials = SparkTrials()
hyperopt_algo = tpe.suggest

n_components_range = np.arange(4, 12, 1, dtype=int)
max_depth_range = np.arange(3, 8, 1, dtype=int)
learning_rate_range = np.arange(0.01, 0.15, 0.01)
n_estimators_range = np.arange(500, 2000, 1, dtype=int)

params = {
    'pca_params': {
        'n_components': hp.choice('n_components', n_components_range)
    },
    'algo_params': {
        'max_depth': hp.choice('max_depth', max_depth_range),
        'learning_rate': hp.choice('learning_rate', learning_rate_range),
        'n_estimators': hp.choice('n_estimators', n_estimators_range),
예제 #18
0
    def test_timeout_with_job_cancellation(self):
        if not self.sparkSupportsJobCancelling():
            print(
                "Skipping timeout test since this Apache PySpark version does not "
                "support cancelling jobs by job group ID.")
            return

        timeout = 2
        spark_trials = SparkTrials(parallelism=4, timeout=timeout)

        def fn(x):
            if x < 0:
                time.sleep(timeout + 20)
                raise Exception("Task should have been cancelled")
            else:
                time.sleep(1)
            return x

        # Test 1 cancelled trial.  Examine logs.
        with patch_logger("hyperopt-spark", logging.DEBUG) as output:
            fmin(
                fn=fn,
                space=hp.uniform("x", -2, 0),
                algo=anneal.suggest,
                max_evals=1,
                trials=spark_trials,
                max_queue_len=1,
                show_progressbar=False,
                return_argmin=False,
                rstate=np.random.RandomState(4),
            )
            log_output = output.getvalue().strip()

            self.assertTrue(spark_trials._fmin_cancelled)
            self.assertEqual(spark_trials._fmin_cancelled_reason,
                             "fmin run timeout")
            self.assertEqual(spark_trials.count_cancelled_trials(), 1)
            self.assertIn(
                "Cancelling all running jobs",
                log_output,
                """ "Cancelling all running jobs" missing from log: {log_output}"""
                .format(log_output=log_output),
            )
            self.assertIn(
                "trial task 0 cancelled",
                log_output,
                """ "trial task 0 cancelled" missing from log: {log_output}""".
                format(log_output=log_output),
            )
            self.assertNotIn(
                "Task should have been cancelled",
                log_output,
                """ "Task should have been cancelled" should not in log:
                              {log_output}""".format(log_output=log_output),
            )
            self.assert_task_failed(log_output, 0)

        # Test mix of successful and cancelled trials.
        spark_trials = SparkTrials(parallelism=4, timeout=4)
        fmin(
            fn=fn,
            space=hp.uniform("x", -0.25, 5),
            algo=anneal.suggest,
            max_evals=6,
            trials=spark_trials,
            max_queue_len=1,
            show_progressbar=False,
            return_argmin=True,
            rstate=np.random.RandomState(4),
        )

        time.sleep(2)
        self.assertTrue(spark_trials._fmin_cancelled)
        self.assertEqual(spark_trials._fmin_cancelled_reason,
                         "fmin run timeout")

        # There are 2 finished trials, 1 cancelled running trial and 1 cancelled
        # new trial. We do not need to check the new trial since it is not started yet.
        self.assertGreaterEqual(
            spark_trials.count_successful_trials(),
            1,
            "Expected at least 1 successful trial but found none.",
        )
        self.assertGreaterEqual(
            spark_trials.count_cancelled_trials(),
            1,
            "Expected at least 1 cancelled trial but found none.",
        )
예제 #19
0
        try:
            class_att = AttentionTFIDFClassifier(**params,
                                                 nepochs=25,
                                                 _verbose=False)
            print(class_att)
            class_att.fit(fold.X_train, fold.y_train, fold.X_val, fold.y_val)
            return {
                "loss": class_att._loss,
                "status": STATUS_OK,
                "model": class_att.to('cpu')
            }
        except:
            return {"status": STATUS_FAIL}

    #trials = Trials()
    trials = SparkTrials(parallelism=cpu_count())

    best = fmin(fn=hyperparameter_tuning_try,
                space=space,
                algo=tpe.suggest,
                max_evals=15 * cpu_count(),
                trials=trials)

    print("Best: {}".format(best))
    class_att = trials.best_trial['result']['model']
    y_pred = class_att.predict(fold.X_test)

    with open(path.join(path_result, f'fold{i}'), 'w') as file_writer:
        file_writer.write(';'.join(map(str, y_pred)))

    print(
예제 #20
0
    def test_parallelism_arg(self):
        # Computing max_num_concurrent_tasks
        max_num_concurrent_tasks = self.sc._jsc.sc().maxNumConcurrentTasks()
        self.assertEqual(
            max_num_concurrent_tasks,
            BaseSparkContext.NUM_SPARK_EXECUTORS,
            "max_num_concurrent_tasks ({c}) did not equal "
            "BaseSparkContext.NUM_SPARK_EXECUTORS ({e})".format(
                c=max_num_concurrent_tasks,
                e=BaseSparkContext.NUM_SPARK_EXECUTORS),
        )

        for spark_default_parallelism, max_num_concurrent_tasks in [(2, 4),
                                                                    (2, 0)]:
            default_parallelism = max(spark_default_parallelism,
                                      max_num_concurrent_tasks)

            # Test requested_parallelism is None or negative values.
            for requested_parallelism in [None, -1]:
                with patch_logger("hyperopt-spark") as output:
                    parallelism = SparkTrials._decide_parallelism(
                        requested_parallelism=requested_parallelism,
                        spark_default_parallelism=spark_default_parallelism,
                        max_num_concurrent_tasks=max_num_concurrent_tasks,
                    )
                    self.assertEqual(
                        parallelism,
                        default_parallelism,
                        "Failed to set parallelism to be default parallelism ({p})"
                        " ({e})".format(p=parallelism, e=default_parallelism),
                    )
                    log_output = output.getvalue().strip()
                    self.assertIn(
                        "Because the requested parallelism was None or a non-positive value, "
                        "parallelism will be set to ({d})".format(
                            d=default_parallelism),
                        log_output,
                        """set to default parallelism missing from log: {log_output}"""
                        .format(log_output=log_output),
                    )

            # Test requested_parallelism which will trigger spark executor dynamic allocation.
            with patch_logger("hyperopt-spark") as output:
                parallelism = SparkTrials._decide_parallelism(
                    requested_parallelism=max_num_concurrent_tasks + 1,
                    spark_default_parallelism=spark_default_parallelism,
                    max_num_concurrent_tasks=max_num_concurrent_tasks,
                )
                self.assertEqual(
                    parallelism,
                    max_num_concurrent_tasks + 1,
                    "Expect parallelism to be ({e}) but get ({p})".format(
                        p=parallelism, e=max_num_concurrent_tasks + 1),
                )
                log_output = output.getvalue().strip()
                self.assertIn(
                    "Parallelism ({p}) is greater".format(
                        p=max_num_concurrent_tasks + 1),
                    log_output,
                    """Parallelism ({p}) missing from log: {log_output}""".
                    format(p=max_num_concurrent_tasks + 1,
                           log_output=log_output),
                )

            # Test requested_parallelism exceeds hard cap
            with patch_logger("hyperopt-spark") as output:
                parallelism = SparkTrials._decide_parallelism(
                    requested_parallelism=SparkTrials.
                    MAX_CONCURRENT_JOBS_ALLOWED + 1,
                    spark_default_parallelism=spark_default_parallelism,
                    max_num_concurrent_tasks=max_num_concurrent_tasks,
                )
                self.assertEqual(
                    parallelism,
                    SparkTrials.MAX_CONCURRENT_JOBS_ALLOWED,
                    "Failed to limit parallelism ({p}) to MAX_CONCURRENT_JOBS_ALLOWED ({e})"
                    .format(p=parallelism,
                            e=SparkTrials.MAX_CONCURRENT_JOBS_ALLOWED),
                )
                log_output = output.getvalue().strip()
                self.assertIn(
                    "SparkTrials.MAX_CONCURRENT_JOBS_ALLOWED ({c})".format(
                        c=SparkTrials.MAX_CONCURRENT_JOBS_ALLOWED),
                    log_output,
                    """MAX_CONCURRENT_JOBS_ALLOWED value missing from log: {log_output}"""
                    .format(log_output=log_output),
                )
예제 #21
0
 def test_quadratic1_tpe(self):
     # TODO: Speed this up or remove it since it is slow (1 minute on laptop)
     spark_trials = SparkTrials(parallelism=4)
     test_quadratic1_tpe(spark_trials)
예제 #22
0
# COMMAND ----------

single_node_epochs = 20
num_classes = 10

#search space for hyperparameter tuning
space = {
    'stride': hp.quniform('stride', 2, 4, 1),
    'batch_size': hp.uniform('batch_size', 32, 128),
    'learning_rate': hp.uniform('learning_rate', -10, 0),
    'optimizer': hp.choice('optimizer', ['adadelta', 'adam', 'rmsprop'])
}
dbutils.fs.rm('/mnt/ved-demo/mlmodels/mnist', True)
dbutils.fs.mkdirs('/mnt/ved-demo/mlmodels/mnist')

spark_trials = SparkTrials(parallelism=parallelism)
with mlflow.start_run():
    argmin = fmin(fn=runCNN,
                  space=space,
                  algo=tpe.suggest,
                  max_evals=32,
                  show_progressbar=False,
                  trials=spark_trials)

#install keras separetly

# COMMAND ----------

# MAGIC %md
# MAGIC #### Return the set of hyperparams that minimized the loss
예제 #23
0
}

# COMMAND ----------

from math import factorial
from hyperopt import fmin, tpe, STATUS_OK, SparkTrials
import numpy as np

# set the parallelism of the search
cluster_nodes = 3
node_cores = 4
num_parallelism = min((cluster_nodes * node_cores), factorial(len(params)))

# Creating a parent run
with mlflow.start_run():
    num_evals = 100  #max models to evaluate
    trials = SparkTrials(num_parallelism)
    best_hyperparam = fmin(fn=objective_function,
                           space=params,
                           algo=tpe.suggest,
                           max_evals=num_evals,
                           trials=trials)

    # Log param and metric for the best model
    for name, value in best_hyperparam.items():
        mlflow.log_param(name, value)

    mlflow.log_metric("loss", trials.best_trial["result"]["loss"])

# COMMAND ----------
예제 #24
0
                                                 validation_steps, device)

    return val_loss


# COMMAND ----------

# DBTITLE 1,Hyperopt
BATCH_SIZE = 100
NUM_EPOCHS = 1


def train_fn(lr):
    loss = train_and_evaluate(lr)
    return {'loss': loss, 'status': STATUS_OK}


search_space = hp.loguniform('lr', -10, -4)

argmin = fmin(fn=train_fn,
              space=search_space,
              algo=tpe.suggest,
              max_evals=1,
              trials=SparkTrials(parallelism=8))

# COMMAND ----------

argmin

# COMMAND ----------