Exemple #1
0
    def run(self):
        if self._max_images_bytes is not None:
            self._image_manager.start_cleanup_thread()
        if not self.shared_file_system:
            self._dependency_manager.start_cleanup_thread()

        while self._should_run():
            try:
                self._checkin()
                self._worker_state_manager.resume_previous_runs(
                    lambda run_info: Run.deserialize(
                        self._bundle_service, self._docker, self.
                        _image_manager, self, run_info))
                self._worker_state_manager.save_state()
                if not self._last_checkin_successful:
                    logger.info('Connected! Successful check in.')
                self._last_checkin_successful = True

            except Exception:
                self._last_checkin_successful = False
                traceback.print_exc()
                time.sleep(1)

        self._checkout()
        self._worker_state_manager.save_state()

        if self._max_images_bytes is not None:
            self._image_manager.stop_cleanup_thread()
        if not self.shared_file_system:
            self._dependency_manager.stop_cleanup_thread()

        if self._should_upgrade:
            self._upgrade()
Exemple #2
0
        def resume_previous_runs():
            # resume previous runs
            self._worker_state_manager.resume_previous_runs(
                lambda run_info: Run.deserialize(self._bundle_service, self.
                                                 _docker, self._image_manager,
                                                 self, run_info))

            # for each resumed run, remove the assigned cpu and gpus from the free sets
            with self._resource_lock:
                run_sets = self._worker_state_manager.map_runs(
                    lambda run: (run._cpuset, run._gpuset))
                for cpuset, gpuset in run_sets:
                    for k in cpuset:
                        if k in self._cpuset:
                            self._cpuset_free.remove(k)
                        else:
                            logger.debug(
                                'Warning: cpu {} not in worker cpuset'.format(
                                    k))

                    for k in gpuset:
                        if k in self._gpuset:
                            self._gpuset_free.remove(k)
                        else:
                            logger.debug(
                                'Warning: gpu {} not in worker gpuset'.format(
                                    k))

            self._worker_state_manager.save_state()