예제 #1
0
def test_retry_run_on_yarn(nb_retries, nb_failures):
    cpt = 0

    def fail(*args, **kwargs):
        if cpt < nb_failures:
            raise Exception("")
        else:
            pass

    with mock.patch('tf_yarn.client._setup_pyenvs'), \
            mock.patch('tf_yarn.client._setup_skein_cluster') as mock_setup_skein_cluster, \
            mock.patch('tf_yarn.client._run_on_cluster') as mock_run_on_cluster:
        mock_run_on_cluster.side_effect = fail

        gb = 2**10

        try:
            run_on_yarn(
                "path/to/env", lambda: Experiment(None, None, None),
                task_specs={
                    "chief": TaskSpec(memory=16 * gb, vcores=16),
                    "worker": TaskSpec(memory=16 * gb, vcores=16, instances=1),
                    "ps": TaskSpec(memory=16 * gb, vcores=16, instances=1)
                },
                nb_retries=nb_retries
            )
        except Exception:
            pass

        nb_calls = min(nb_retries, nb_failures) + 1
        assert mock_run_on_cluster.call_count == nb_calls
        assert mock_setup_skein_cluster.call_count == nb_calls
예제 #2
0
def _experiment_fn(model_dir):
    print(f"create experiment with model_dir={model_dir}")

    def model_fn():
        return tf.estimator.EstimatorSpec()

    def train_fn():
        return None

    def eval_fn():
        return None

    return Experiment(
        tf.estimator.LinearClassifier(feature_columns=[], model_dir=model_dir),
        tf.estimator.TrainSpec(train_fn), tf.estimator.EvalSpec(eval_fn))
예제 #3
0
def add_monitor_to_experiment(experiment: Experiment):
    monitored_eval_spec = experiment.eval_spec._replace(
        hooks=(EvalMonitorHook(), *experiment.eval_spec.hooks))
    experiment = experiment._replace(eval_spec=monitored_eval_spec)
    return experiment