Example #1
0
    def test_default_is_chief_without_tf_config_or_job_name(self):
        # When is_chief is omitted, there is no TF_CONFIG and no job_name
        # (legacy behavior), then is_chief should be True iff task == 0.
        config = run_config.RunConfig(task=0)
        self.assertTrue(config.is_chief)

        config = run_config.RunConfig(task=1)
        self.assertFalse(config.is_chief)
Example #2
0
    def test_bad_is_chief_combinations_raise(self):
        msg = "Task is 1, but only task 0 may be chief"
        with self.assertRaisesRegexp(ValueError, msg):
            run_config.RunConfig(is_chief=True, task=1)

        msg = "job_name is \'ps\', but only masters or workers may be chiefs"
        with self.assertRaisesRegexp(ValueError, msg):
            run_config.RunConfig(is_chief=True, task=0, job_name="ps")

        with self.assertRaisesRegexp(ValueError,
                                     "Master task 0 must be chief"):
            run_config.RunConfig(is_chief=False, task=0, job_name="master")
Example #3
0
    def test_default_is_chief_without_tf_config_but_has_job_name(self):
        # When is_chief is omitted, there is no TF_CONFIG but there is a job_name,
        # then is_chief is True iff job_name is "worker" and task == 0.
        config = run_config.RunConfig(job_name="worker", task=0)
        self.assertTrue(config.is_chief)

        config = run_config.RunConfig(job_name="worker", task=1)
        self.assertFalse(config.is_chief)

        config = run_config.RunConfig(job_name="ps", task=0)
        self.assertFalse(config.is_chief)

        config = run_config.RunConfig(job_name="ps", task=1)
        self.assertFalse(config.is_chief)
Example #4
0
    def test_explicitly_specified_values(self):
        cluster_spec = {
            run_config_lib.TaskType.PS: ["localhost:9990"],
            "my_job_name": ["localhost:9991", "localhost:9992", "localhost:0"]
        }
        tf_config = {
            "cluster": cluster_spec,
            "task": {
                "type": run_config_lib.TaskType.WORKER,
                "index": 2
            }
        }
        with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}):
            config = run_config.RunConfig(master="localhost:0",
                                          evaluation_master="localhost:9991")

        self.assertEqual(config.master, "localhost:0")
        self.assertEqual(config.task_id, 2)
        self.assertEqual(config.num_ps_replicas, 1)
        self.assertEqual(config.num_worker_replicas, 0)
        self.assertEqual(config.cluster_spec,
                         server_lib.ClusterSpec(cluster_spec))
        self.assertEqual(config.task_type, run_config_lib.TaskType.WORKER)
        self.assertFalse(config.is_chief)
        self.assertEqual(config.evaluation_master, "localhost:9991")
 def test_fail_job_name_with_no_default_schedule(self):
   config = run_config.RunConfig(
       job_name="foo_has_no_default_schedule",
       cluster_spec=build_distributed_cluster_spec())
   create_experiment_fn = lambda output_dir: TestExperiment(config=config)
   self.assertRaisesRegexp(ValueError, "No default schedule",
                           learn_runner.run, create_experiment_fn, "/tmp")
 def test_no_schedule_and_non_distributed_runs_local_run(self):
   config = run_config.RunConfig(
       cluster_spec=build_non_distributed_cluster_spec())
   self.assertEqual(
       "local_run",
       learn_runner.run(lambda output_dir: TestExperiment(config=config),
                        output_dir="/tmp"))
 def test_schedule_from_config_runs_train_on_worker(self):
   config = run_config.RunConfig(
       job_name="worker", cluster_spec=build_distributed_cluster_spec())
   self.assertEqual(
       "train",
       learn_runner.run(lambda output_dir: TestExperiment(config=config),
                        output_dir="/tmp"))
 def test_no_schedule_and_non_distributed_runs_train_and_evaluate(self):
   tf_config = {"cluster": build_non_distributed_cluster_spec()}
   with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}):
     config = run_config.RunConfig()
     self.assertEqual(
         "train_and_evaluate",
         learn_runner.run(lambda output_dir: TestExperiment(config=config),
                          output_dir="/tmp"))
Example #9
0
 def __init__(self, config=None, max_evals=5):
     self.eval_count = 0
     self.fit_count = 0
     self._max_evals = max_evals
     self.export_count = 0
     self.monitors = []
     self._config = config or run_config.RunConfig()
     self._model_dir = tempfile.mkdtemp()
Example #10
0
 def test_schedule_from_tf_config(self):
     os.environ["TF_CONFIG"] = json.dumps({"task": {"type": "worker"}})
     # RunConfig constructuor will set job_name from TF_CONFIG.
     config = run_config.RunConfig()
     self.assertEqual(
         "train",
         learn_runner.run(lambda output_dir: TestExperiment(config=config),
                          output_dir="/tmp"))
Example #11
0
 def test_fail_schedule_from_config_with_no_job_name(self):
     config = run_config.RunConfig(job_name=None)
     self.assertRaisesRegexp(
         ValueError,
         "Must specify a schedule",
         learn_runner.run,
         lambda output_dir: TestExperiment(config=config),
         output_dir="/tmp")
Example #12
0
 def test_defaults_with_no_tf_config(self):
     config = run_config.RunConfig()
     self.assertEquals(config.master, "")
     self.assertEquals(config.task, 0)
     self.assertEquals(config.num_ps_replicas, 0)
     self.assertIsNone(config.cluster_spec)
     self.assertIsNone(config.job_name)
     self.assertIsNone(config.is_chief)
Example #13
0
 def test_invalid_job_name_raises(self):
     cluster_spec = tf.train.ClusterSpec({
         "ps": ["host1:1", "host2:2"],
         "worker": ["host3:3", "host4:4", "host5:5"]
     })
     expected_msg_regexp = "not_in_cluster_spec is not a valid task"
     with self.assertRaisesRegexp(ValueError, expected_msg_regexp):
         run_config.RunConfig(cluster_spec=cluster_spec,
                              job_name="not_in_cluster_spec")
Example #14
0
 def test_defaults_with_no_tf_config(self):
     config = run_config.RunConfig()
     self.assertEqual(config.master, "")
     self.assertEqual(config.task_id, 0)
     self.assertEqual(config.num_ps_replicas, 0)
     self.assertEqual(config.cluster_spec, {})
     self.assertIsNone(config.task_type)
     self.assertTrue(config.is_chief)
     self.assertEqual(config.evaluation_master, "")
Example #15
0
 def test_no_job_name_produces_empty_master(self):
     cluster_spec = tf.train.ClusterSpec({
         "ps": ["host1:1", "host2:2"],
         "worker": ["host3:3", "host4:4", "host5:5"]
     })
     # NB: omitted job_name; better to omit than explictly set to None
     # as this better mimics client behavior.
     config = run_config.RunConfig(cluster_spec=cluster_spec)
     self.assertEquals(config.master, "")
Example #16
0
 def test_fail_schedule_from_config_with_no_task_type(self):
     tf_config = {"cluster": build_distributed_cluster_spec()}
     with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}):
         config = run_config.RunConfig()
         self.assertRaisesRegexp(
             ValueError,
             "Must specify a schedule",
             learn_runner.run,
             lambda output_dir: TestExperiment(config=config),
             output_dir="/tmp")
 def test_schedule_from_tf_config_runs_train_on_worker(self):
   os.environ["TF_CONFIG"] = json.dumps(
       {"cluster": build_distributed_cluster_spec(),
        "task": {"type": tf.contrib.learn.TaskType.WORKER}})
   # RunConfig constructor will set job_name from TF_CONFIG.
   config = run_config.RunConfig()
   self.assertEqual(
       "train",
       learn_runner.run(lambda output_dir: TestExperiment(config=config),
                        output_dir="/tmp"))
Example #18
0
    def test_bad_is_chief_combinations_raise(self):
        msg = "Task is 1, but only task 0 may be chief"
        with self.assertRaisesRegexp(ValueError, msg):
            run_config.RunConfig(is_chief=True, task=1)

        msg = "job_name is \'ps\', but only masters or workers may be chiefs"
        with self.assertRaisesRegexp(ValueError, msg):
            run_config.RunConfig(is_chief=True, task=0, job_name="ps")

        msg = "Master task 0 must be chief for cloud"
        with self.assertRaisesRegexp(ValueError, msg):
            tf_config = {"environment": "cloud"}
            with patch.dict("os.environ",
                            {"TF_CONFIG": json.dumps(tf_config)}):
                run_config.RunConfig(is_chief=False, task=0, job_name="master")

        msg = "Worker task 0 must be chief"
        with self.assertRaisesRegexp(ValueError, msg):
            run_config.RunConfig(is_chief=False, task=0, job_name="worker")
 def test_schedule_from_config_runs_local_run_on_master(self):
   config = run_config.RunConfig(
       job_name="master",
       cluster_spec=build_distributed_cluster_spec(),
       task=0,
       is_chief=True)
   self.assertEqual(
       "local_run",
       learn_runner.run(lambda output_dir: TestExperiment(config=config),
                        output_dir="/tmp"))
    def test_uid(self):
        config = run_config.RunConfig(tf_random_seed=RANDOM_SEED,
                                      model_dir=TEST_DIR)

        expected_uid = config.uid()
        # Check for 10 times, which should prove something.
        for _ in range(10):
            self.assertEqual(expected_uid, config.uid())

        new_config = config.replace(model_dir=ANOTHER_TEST_DIR)
        self.assertNotEqual(expected_uid, new_config.uid())
Example #21
0
 def test_num_ps_replicas_and_cluster_spec_are_mutually_exclusive(self):
     cluster_spec = tf.train.ClusterSpec({
         "ps": ["host1:1", "host2:2"],
         "worker": ["host3:3", "host4:4", "host5:5"]
     })
     expected_msg_regexp = "Cannot specify both num_ps_replicas and cluster_spec"
     with self.assertRaisesRegexp(ValueError, expected_msg_regexp):
         run_config.RunConfig(
             num_ps_replicas=2,
             cluster_spec=cluster_spec,
         )
Example #22
0
  def test_is_chief_from_noncloud_tf_config(self):
    # is_chief should be true when ["task"]["type"] == "worker" and
    # index == 0 if ["task"]["environment"] != "cloud".
    tf_config = {
        "cluster": {
            tf.contrib.learn.TaskType.PS: ["host1:1", "host2:2"],
            tf.contrib.learn.TaskType.MASTER: ["host3:3"],
            tf.contrib.learn.TaskType.WORKER:
                ["host4:4", "host5:5", "host6:6"]
        },
        "task": {
            "type": tf.contrib.learn.TaskType.WORKER,
            "index": 0
        },
        "environment":
            "random"
    }
    with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}):
      config = run_config.RunConfig()

    self.assertTrue(config.is_chief)

    # But task 0 for a job named "master" should not be.
    tf_config = {
        "cluster": {
            tf.contrib.learn.TaskType.PS: ["host1:1", "host2:2"],
            tf.contrib.learn.TaskType.MASTER: ["host3:3"],
            tf.contrib.learn.TaskType.WORKER:
                ["host4:4", "host5:5", "host6:6"]
        },
        "task": {
            "type": tf.contrib.learn.TaskType.MASTER,
            "index": 0
        },
        "environment":
            "random"
    }
    with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}):
      config = run_config.RunConfig()

    self.assertFalse(config.is_chief)
Example #23
0
 def test_no_task_type_produces_empty_master(self):
     tf_config = {
         "cluster": {
             run_config_lib.TaskType.PS: ["host1:1", "host2:2"],
             run_config_lib.TaskType.WORKER:
             ["host3:3", "host4:4", "host5:5"]
         },
         # Omits "task": {"type": "worker}
     }
     with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}):
         config = run_config.RunConfig()
         self.assertEqual(config.master, "")
Example #24
0
 def test_train_default_delay(self):
     config = run_config.RunConfig()
     est = TestEstimator(config)
     ex = tf.contrib.learn.Experiment(est,
                                      train_input_fn='train_input',
                                      eval_input_fn='eval_input')
     for task in [0, 1, 3]:
         start = time.time()
         config.task = task
         ex.train()
         duration = time.time() - start
         self.assertAlmostEqual(duration, task * 5, delta=0.5)
 def test_fail_task_type_with_no_default_schedule(self):
   tf_config = {
       "cluster": build_distributed_cluster_spec(),
       "task": {
           "type": "foo_has_no_default_schedule"
       }
   }
   with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}):
     config = run_config.RunConfig()
     create_experiment_fn = lambda output_dir: TestExperiment(config=config)
     self.assertRaisesRegexp(ValueError, "No default schedule",
                             learn_runner.run, create_experiment_fn, "/tmp")
 def test_schedule_from_tf_config_runs_serve_on_ps(self):
   tf_config = {
       "cluster": build_distributed_cluster_spec(),
       "task": {
           "type": tf.contrib.learn.TaskType.PS
       }
   }
   with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}):
     config = run_config.RunConfig()
     self.assertEqual(
         "run_std_server",
         learn_runner.run(lambda output_dir: TestExperiment(config=config),
                          output_dir="/tmp"))
Example #27
0
 def test_schedule_from_tf_config_runs_train_and_evaluate_on_master(self):
     tf_config = {
         "cluster": build_distributed_cluster_spec(),
         "task": {
             "type": run_config_lib.TaskType.MASTER
         }
     }
     with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}):
         config = run_config.RunConfig()
         self.assertEqual(
             "train_and_evaluate",
             learn_runner.run(
                 lambda output_dir: TestExperiment(config=config),
                 output_dir="/tmp"))
Example #28
0
  def test_train_default_delay(self):
    for task_id in [0, 1, 3]:
      tf_config = {'task': {'index': task_id}}
      with test.mock.patch.dict('os.environ',
                                {'TF_CONFIG': json.dumps(tf_config)}):
        config = run_config.RunConfig()
      for est in self._estimators_for_tests(config):
        ex = experiment.Experiment(
            est, train_input_fn='train_input', eval_input_fn='eval_input')

        sheep = SheepCounter()
        with test.mock.patch.object(time, 'time', sheep.time):
          with test.mock.patch.object(time, 'sleep', sheep.sleep):
            ex.train()
            self.assertAlmostEqual(task_id * 5, sheep.time(), delta=1e-4)
Example #29
0
    def test_train_default_delay(self):
        for task_id in [0, 1, 3]:
            tf_config = {'task': {'index': task_id}}
            with patch.dict('os.environ',
                            {'TF_CONFIG': json.dumps(tf_config)}):
                config = run_config.RunConfig()
            est = TestEstimator(config)
            ex = tf.contrib.learn.Experiment(est,
                                             train_input_fn='train_input',
                                             eval_input_fn='eval_input')

            start = time.time()
            ex.train()
            duration = time.time() - start
            self.assertAlmostEqual(duration, task_id * 5, delta=1.0)
Example #30
0
    def test_train_default_delay(self):
        for task_id in [0, 1, 3]:
            tf_config = {'task': {'index': task_id}}
            with test.mock.patch.dict('os.environ',
                                      {'TF_CONFIG': json.dumps(tf_config)}):
                config = run_config.RunConfig()
            est = TestEstimator(config)
            ex = experiment.Experiment(est,
                                       train_input_fn='train_input',
                                       eval_input_fn='eval_input')

            with test.mock.patch('time.sleep', SheepCounter()) as sheep:
                ex.train()
                self.assertAlmostEqual(task_id * 5,
                                       sheep.total_time,
                                       delta=0.1)