コード例 #1
0
    def on_step_end(self, iteration, trials, **info):
        num_finished = len([
            t for t in trials
            if t.status == Trial.TERMINATED or t.status == Trial.ERROR
        ])
        num_running = len([t for t in trials if t.status == Trial.RUNNING])

        num_staging = sum(
            len(s) for s in trial_executor._pg_manager._staging.values())
        num_ready = sum(
            len(s) for s in trial_executor._pg_manager._ready.values())
        num_in_use = len(trial_executor._pg_manager._in_use_pgs)
        num_cached = len(trial_executor._pg_manager._cached_pgs)

        total_num_tracked = num_staging + num_ready + \
            num_in_use + num_cached

        num_non_removed_pgs = len([
            p for pid, p in placement_group_table().items()
            if p["state"] != "REMOVED"
        ])
        num_removal_scheduled_pgs = len(
            trial_executor._pg_manager._pgs_for_removal)

        # All 3 trials (3 different learning rates) should be scheduled.
        assert 3 == min(3, len(trials))
        # Cannot run more than 2 at a time
        # (due to different resource restrictions in the test cases).
        assert num_running <= 2
        # The number of placement groups should decrease
        # when trials finish.
        assert max(3, len(trials)) - num_finished == total_num_tracked
        # The number of actual placement groups should match this.
        assert max(3, len(trials)) - num_finished == \
            num_non_removed_pgs - num_removal_scheduled_pgs
コード例 #2
0
    def schedule_fold_model_fit(self, model_base, fold_ctx, kwargs):
        args = [model_base, fold_ctx, kwargs]
        args_refs = [ray.put(arg) for arg in args]
        print('...model_fit')

        pg = placement_group([{"CPU": 2}], strategy="STRICT_SPREAD")
        ray.get(pg.ready())
        print(placement_group_table(pg))
        results_ref = model_fit_task_ray.options(placement_group=pg).remote(
            *args_refs)
        self.jobs.append((results_ref, time_start_fold, on_fit_end_fn))
コード例 #3
0
ファイル: test_trial_runner_pg.py プロジェクト: ijrsvt/ray
            def on_step_end(self, iteration, trials, **info):
                num_finished = len([
                    t for t in trials
                    if t.status == Trial.TERMINATED or t.status == Trial.ERROR
                ])

                num_staging = sum(
                    len(s)
                    for s in trial_executor._pg_manager._staging.values())
                num_ready = sum(
                    len(s) for s in trial_executor._pg_manager._ready.values())
                num_in_use = len(trial_executor._pg_manager._in_use_pgs)
                num_cached = len(trial_executor._pg_manager._cached_pgs)

                total_num_tracked = num_staging + num_ready + num_in_use + num_cached

                num_non_removed_pgs = len([
                    p for pid, p in placement_group_table().items()
                    if p["state"] != "REMOVED"
                ])
                num_removal_scheduled_pgs = len(
                    trial_executor._pg_manager._pgs_for_removal)

                # All trials should be scheduled
                this.assertEqual(
                    scheduled,
                    min(scheduled, len(trials)),
                    msg=f"Num trials iter {iteration}",
                )

                # The following two tests were relaxed for reuse_actors=True
                # so that up to `max_num_parallel` more placement groups can
                # exist than we would expect. This is because caching
                # relies on reconciliation for cleanup to avoid overscheduling
                # of new placement groups.
                num_parallel_reuse = int(reuse_actors) * max_num_parallel

                # The number of PGs should decrease when trials finish
                this.assertGreaterEqual(
                    max(scheduled, len(trials)) - num_finished +
                    num_parallel_reuse,
                    total_num_tracked,
                    msg=f"Num tracked iter {iteration}",
                )

                # The number of actual placement groups should match this
                this.assertGreaterEqual(
                    max(scheduled, len(trials)) - num_finished +
                    num_parallel_reuse,
                    num_non_removed_pgs - num_removal_scheduled_pgs,
                    msg=f"Num actual iter {iteration}",
                )
コード例 #4
0
    def _assertCleanup(self, trial_executor):
        # Assert proper cleanup
        pg_manager = trial_executor._pg_manager
        self.assertFalse(pg_manager._in_use_trials)
        self.assertFalse(pg_manager._in_use_pgs)
        self.assertFalse(pg_manager._staging_futures)
        for pgf in pg_manager._staging:
            self.assertFalse(pg_manager._staging[pgf])
        for pgf in pg_manager._ready:
            self.assertFalse(pg_manager._ready[pgf])

        num_non_removed_pgs = len([
            p for pid, p in placement_group_table().items()
            if p["state"] != "REMOVED"
        ])
        self.assertEqual(num_non_removed_pgs, 0)
コード例 #5
0
            def on_step_end(self, iteration, trials, **info):
                num_finished = len([
                    t for t in trials
                    if t.status == Trial.TERMINATED or t.status == Trial.ERROR
                ])

                num_staging = sum(
                    len(s)
                    for s in trial_executor._pg_manager._staging.values())
                num_ready = sum(
                    len(s) for s in trial_executor._pg_manager._ready.values())
                num_in_use = len(trial_executor._pg_manager._in_use_pgs)
                num_cached = len(trial_executor._pg_manager._cached_pgs)

                total_num_tracked = num_staging + num_ready + \
                    num_in_use + num_cached

                num_non_removed_pgs = len([
                    p for pid, p in placement_group_table().items()
                    if p["state"] != "REMOVED"
                ])
                num_removal_scheduled_pgs = len(
                    trial_executor._pg_manager._pgs_for_removal)

                # All trials should be scheduled
                this.assertEqual(
                    scheduled,
                    min(scheduled, len(trials)),
                    msg=f"Num trials iter {iteration}")
                # The number of PGs should decrease when trials finish
                this.assertEqual(
                    max(scheduled, len(trials)) - num_finished,
                    total_num_tracked,
                    msg=f"Num tracked iter {iteration}")
                # The number of actual placement groups should match this
                this.assertEqual(
                    max(scheduled, len(trials)) - num_finished,
                    num_non_removed_pgs - num_removal_scheduled_pgs,
                    msg=f"Num actual iter {iteration}")