예제 #1
0
    def test_timeout_without_job_cancellation(self):
        timeout = 4
        spark_trials = SparkTrials(parallelism=1, timeout=timeout)
        spark_trials._spark_supports_job_cancelling = False

        def fn(x):
            time.sleep(0.5)
            return x

        with patch_logger('hyperopt-spark', logging.DEBUG) as output:
            fmin(fn=fn,
                 space=hp.uniform('x', -1, 1),
                 algo=anneal.suggest,
                 max_evals=10,
                 trials=spark_trials,
                 max_queue_len=1,
                 show_progressbar=False,
                 return_argmin=False)
            log_output = output.getvalue().strip()

            self.assertTrue(spark_trials._fmin_cancelled)
            self.assertEqual(spark_trials._fmin_cancelled_reason,
                             "fmin run timeout")
            self.assertGreater(spark_trials.count_successful_trials(), 0)
            self.assertGreater(spark_trials.count_cancelled_trials(), 0)
            self.assertIn(
                "fmin is cancelled, so new trials will not be launched",
                log_output,
                """ "fmin is cancelled, so new trials will not be launched" missing from log:
                {log_output}""".format(log_output=log_output))
            self.assertIn(
                "SparkTrials will block", log_output,
                """ "SparkTrials will block" missing from log: {log_output}""".
                format(log_output=log_output))
            self.assert_task_succeeded(log_output, 0)
예제 #2
0
    def test_timeout_with_job_cancellation(self):
        if not self.sparkSupportsJobCancelling():
            print(
                "Skipping timeout test since this Apache PySpark version does not "
                "support cancelling jobs by job group ID.")
            return

        timeout = 2
        spark_trials = SparkTrials(parallelism=4, timeout=timeout)

        def fn(x):
            if x < 0:
                time.sleep(timeout + 20)
                raise Exception("Task should have been cancelled")
            else:
                time.sleep(1)
            return x

        # Test 1 cancelled trial.  Examine logs.
        with patch_logger("hyperopt-spark", logging.DEBUG) as output:
            fmin(
                fn=fn,
                space=hp.uniform("x", -2, 0),
                algo=anneal.suggest,
                max_evals=1,
                trials=spark_trials,
                max_queue_len=1,
                show_progressbar=False,
                return_argmin=False,
                rstate=np.random.RandomState(4),
            )
            log_output = output.getvalue().strip()

            self.assertTrue(spark_trials._fmin_cancelled)
            self.assertEqual(spark_trials._fmin_cancelled_reason,
                             "fmin run timeout")
            self.assertEqual(spark_trials.count_cancelled_trials(), 1)
            self.assertIn(
                "Cancelling all running jobs",
                log_output,
                """ "Cancelling all running jobs" missing from log: {log_output}"""
                .format(log_output=log_output),
            )
            self.assertIn(
                "trial task 0 cancelled",
                log_output,
                """ "trial task 0 cancelled" missing from log: {log_output}""".
                format(log_output=log_output),
            )
            self.assertNotIn(
                "Task should have been cancelled",
                log_output,
                """ "Task should have been cancelled" should not in log:
                              {log_output}""".format(log_output=log_output),
            )
            self.assert_task_failed(log_output, 0)

        # Test mix of successful and cancelled trials.
        spark_trials = SparkTrials(parallelism=4, timeout=4)
        fmin(
            fn=fn,
            space=hp.uniform("x", -0.25, 5),
            algo=anneal.suggest,
            max_evals=6,
            trials=spark_trials,
            max_queue_len=1,
            show_progressbar=False,
            return_argmin=True,
            rstate=np.random.RandomState(4),
        )

        time.sleep(2)
        self.assertTrue(spark_trials._fmin_cancelled)
        self.assertEqual(spark_trials._fmin_cancelled_reason,
                         "fmin run timeout")

        # There are 2 finished trials, 1 cancelled running trial and 1 cancelled
        # new trial. We do not need to check the new trial since it is not started yet.
        self.assertGreaterEqual(
            spark_trials.count_successful_trials(),
            1,
            "Expected at least 1 successful trial but found none.",
        )
        self.assertGreaterEqual(
            spark_trials.count_cancelled_trials(),
            1,
            "Expected at least 1 cancelled trial but found none.",
        )