Esempio n. 1
0
    def __init__(self):
        self._logger = get_logger(__name__)

        self._all_slaves_by_url = {}
        self._all_builds_by_id = OrderedDict()  # This is an OrderedDict so we can more easily implement get_queue()
        self._builds_waiting_for_slaves = Queue()

        self._request_queue = Queue()
        self._request_handler = SerialRequestHandler()

        self._request_queue_worker_thread = SafeThread(
            target=self._build_preparation_loop, name='RequestHandlerLoop', daemon=True)
        self._request_queue_worker_thread.start()

        self._slave_allocation_worker_thread = SafeThread(
            target=self._slave_allocation_loop, name='SlaveAllocationLoop', daemon=True)
        self._slave_allocation_worker_thread.start()

        self._master_results_path = Configuration['results_directory']

        # It's important that idle slaves are only in the queue once so we use OrderedSet
        self._idle_slaves = OrderedSetQueue()

        # Asynchronously delete (but immediately rename) all old builds when master starts.
        # Remove this if/when build numbers are unique across master starts/stops
        if os.path.exists(self._master_results_path):
            fs.async_delete(self._master_results_path)

        fs.create_dir(self._master_results_path)
 def post(self):
     self._write_status()
     kill_thread = SafeThread(
         name='kill-thread',
         target=self._cluster_slave.kill,
     )
     kill_thread.start()
Esempio n. 3
0
    def setup_build(self, build_id, project_type_params, build_executor_start_index):
        """
        Usually called once per build to do build-specific setup. Will block any subjobs from executing until setup
        completes. The actual setup is performed on another thread and will unblock subjobs (via an Event) once it
        finishes.

        :param build_id: The id of the build to run setup on
        :type build_id: int
        :param project_type_params: The parameters that define the project_type this build will execute in
        :type project_type_params: dict
        :param build_executor_start_index: How many executors have alreayd been allocated on other slaves for
        this build
        :type build_executor_start_index: int
        """
        self._logger.info('Executing setup for build {} (type: {}).', build_id, project_type_params.get('type'))
        self._current_build_id = build_id
        self._build_teardown_coin = SingleUseCoin()  # protects against build_teardown being executed multiple times

        # create an project_type instance for build-level operations
        self._project_type = util.create_project_type(project_type_params)

        # verify all executors are idle
        if not self._idle_executors.full():
            raise RuntimeError('Slave tried to setup build but not all executors are idle. ({}/{} executors idle.)'
                               .format(self._idle_executors.qsize(), self._num_executors))

        # Collect all the executors to pass to project_type.fetch_project(). This will create a new project_type for
        # each executor (for subjob-level operations).
        executors = list(self._idle_executors.queue)
        SafeThread(
            target=self._async_setup_build,
            name='Bld{}-Setup'.format(build_id),
            args=(executors, project_type_params, build_executor_start_index)
        ).start()
Esempio n. 4
0
    def test_executing_build_teardown_multiple_times_will_raise_exception(
            self):
        self.mock_network.post().status_code = http.client.OK
        slave = self._create_cluster_slave()
        project_type_mock = self.patch(
            'app.slave.cluster_slave.util.create_project_type').return_value
        # This test uses setup_complete_event to detect when the async fetch_project() has executed.
        setup_complete_event = Event()
        project_type_mock.fetch_project.side_effect = self.no_args_side_effect(
            setup_complete_event.set)
        # This test uses teardown_event to cause a thread to block on the teardown_build() call.
        teardown_event = Event()
        project_type_mock.teardown_build.side_effect = self.no_args_side_effect(
            teardown_event.wait)

        slave.connect_to_master(self._FAKE_MASTER_URL)
        slave.setup_build(build_id=123, project_type_params={'type': 'Fake'})
        self.assertTrue(setup_complete_event.wait(timeout=5),
                        'Build setup should complete very quickly.')

        # Start the first thread that does build teardown. This thread will block on teardown_build().
        first_thread = SafeThread(target=slave._do_build_teardown_and_reset)
        first_thread.start()
        # Call build teardown() again and it should raise an exception.
        with self.assertRaises(BuildTeardownError):
            slave._do_build_teardown_and_reset()

        # Cleanup: Unblock the first thread and let it finish. We use the unhandled exception handler just in case any
        # exceptions occurred on the thread (so that they'd be passed back to the main thread and fail the test).
        teardown_event.set()
        with UnhandledExceptionHandler.singleton():
            first_thread.join()
Esempio n. 5
0
    def test_calling_kill_subprocesses_will_break_out_of_command_execution_wait_loop(
            self):
        self._mock_stdout_and_stderr(b'fake_output', b'fake_error')
        self.mock_popen.pid = 55555
        self._simulate_hanging_popen_process()

        project_type = ProjectType()
        command_thread = SafeThread(
            target=project_type.execute_command_in_project,
            args=('echo The power is yours!', ))

        # This calls execute_command_in_project() on one thread, and calls kill_subprocesses() on another. The
        # kill_subprocesses() call should cause the first thread to exit.
        command_thread.start()
        project_type.kill_subprocesses()

        # This *should* join immediately, but we specify a timeout just in case something goes wrong so that the test
        # doesn't hang. A successful join implies success. We also use the UnhandledExceptionHandler so that exceptions
        # propagate from the child thread to the test thread and fail the test.
        with UnhandledExceptionHandler.singleton():
            command_thread.join(timeout=10)
            if command_thread.is_alive():
                self.mock_killpg(
                )  # Calling killpg() causes the command thread to end.
                self.fail(
                    'project_type.kill_subprocesses should cause the command execution wait loop to exit.'
                )

        self.mock_killpg.assert_called_once_with(
            55555, ANY)  # Note: os.killpg does not accept keyword args.
Esempio n. 6
0
    def start_working_on_subjob(self, build_id, subjob_id, subjob_artifact_dir, atomic_commands):
        """
        Begin working on a subjob with the given build id and subjob id. This just starts the subjob execution
        asynchronously on a separate thread.

        :type build_id: int
        :type subjob_id: int
        :type subjob_artifact_dir: str
        :type atomic_commands: list[str]
        :return: The text to return in the API response.
        :rtype: dict[str, int]
        """
        if build_id != self._current_build_id:
            raise BadRequestError('Attempted to start subjob {} for build {}, '
                                  'but current build id is {}.'.format(subjob_id, build_id, self._current_build_id))

        # get idle executor from queue to claim it as in-use (or block until one is available)
        executor = self._idle_executors.get()

        # Start a thread to execute the job (after waiting for setup to complete)
        SafeThread(
            target=self._execute_subjob,
            args=(build_id, subjob_id, executor, subjob_artifact_dir, atomic_commands),
            name='Bld{}-Sub{}'.format(build_id, subjob_id),
        ).start()

        self._logger.info('Slave ({}:{}) has received subjob. (Build {}, Subjob {})', self.host, self.port, build_id,
                          subjob_id)
        return {'executor_id': executor.id}
Esempio n. 7
0
    def start_subjob(self, subjob):
        """
        :type subjob: Subjob
        """
        if not self.is_alive():
            raise RuntimeError('Tried to start a subjob on a dead slave! ({}, id: {})'.format(self.url, self.id))

        SafeThread(target=self._async_start_subjob, args=(subjob,)).start()
Esempio n. 8
0
 def run(self, *args, **kwargs):
     app_thread = SafeThread(
         name=self._THREAD_NAME,
         target=self.async_run,
         args=args,
         kwargs=kwargs,
     )
     app_thread.start()
     app_thread.join()
 def __init__(self):
     self._logger = get_logger(__name__)
     self._builds_waiting_for_slaves = Queue()
     self._request_queue = Queue()
     self._request_queue_worker_thread = SafeThread(
         target=self._build_preparation_loop,
         name='RequestHandlerLoop',
         daemon=True)
     self._project_preparation_locks = {}
Esempio n. 10
0
 def __init__(self, build_request_handler):
     """
     :type build_request_handler: BuildRequestHandler
     """
     self._logger = get_logger(__name__)
     self._build_request_handler = build_request_handler
     self._idle_slaves = OrderedSetQueue()
     self._allocation_thread = SafeThread(
         target=self._slave_allocation_loop,
         name='SlaveAllocationLoop',
         daemon=True)
 def __init__(self, scheduler_pool):
     """
     :type scheduler_pool: app.master.build_scheduler_pool.BuildSchedulerPool
     """
     self._logger = get_logger(__name__)
     self._scheduler_pool = scheduler_pool
     self._idle_slaves = OrderedSetQueue()
     self._allocation_thread = SafeThread(
         target=self._slave_allocation_loop,
         name='SlaveAllocationLoop',
         daemon=True)
Esempio n. 12
0
 def __init__(self, scheduler_pool):
     """
     :type scheduler_pool: app.master.build_scheduler_pool.BuildSchedulerPool
     """
     self._logger = get_logger(__name__)
     self._scheduler_pool = scheduler_pool
     self._request_queue = Queue()
     self._request_queue_worker_thread = SafeThread(
         target=self._build_preparation_loop,
         name='RequestHandlerLoop',
         daemon=True)
     self._project_preparation_locks = {}
Esempio n. 13
0
    def start_subjob(self, subjob):
        """
        :type subjob: Subjob
        """
        if not self.is_alive():
            raise DeadSlaveError('Tried to start a subjob on a dead slave! ({}, id: {})'.format(self.url, self.id))

        if self._is_in_shutdown_mode:
            raise SlaveMarkedForShutdownError('Tried to start a subjob on a slave in shutdown mode. ({}, id: {})'
                                              .format(self.url, self.id))

        SafeThread(target=self._async_start_subjob, args=(subjob,)).start()
 def __init__(self, scheduler_pool):
     """
     :type scheduler_pool: BuildSchedulerPool
     """
     self._logger = get_logger(__name__)
     self._scheduler_pool = scheduler_pool
     self._builds_waiting_for_slaves = Queue()
     self._request_queue = Queue()
     self._request_queue_worker_thread = SafeThread(
         target=self._build_preparation_loop,
         name='RequestHandlerLoop',
         daemon=True)
     self._project_preparation_locks = {}
     self._subjob_calculator = SubjobCalculator()
    def test_exception_on_safe_thread_calls_teardown_callbacks(self):
        my_awesome_teardown_callback = MagicMock()
        unhandled_exception_handler = UnhandledExceptionHandler.singleton()
        unhandled_exception_handler.add_teardown_callback(
            my_awesome_teardown_callback, 'fake arg', fake_kwarg='boop')

        def my_terrible_method():
            raise Exception('Sic semper tyrannis!')

        thread = SafeThread(target=my_terrible_method)
        thread.start()
        thread.join()

        my_awesome_teardown_callback.assert_called_once_with('fake arg',
                                                             fake_kwarg='boop')
Esempio n. 16
0
    def mark_subjob_complete(self, subjob_id):
        """
        :type subjob_id: int
        """
        subjob = self._all_subjobs_by_id[int(subjob_id)]
        with self._build_completion_lock:
            self._finished_subjobs.put(subjob, block=False)
            subjobs_are_finished = self._subjobs_are_finished

        # We use a local variable here which was set inside the _build_completion_lock to prevent a race condition
        if subjobs_are_finished:
            self._logger.info("All results received for build {}!",
                              self._build_id)
            SafeThread(target=self._perform_async_postbuild_tasks,
                       name='PostBuild{}'.format(self._build_id)).start()
Esempio n. 17
0
    def start_subjob(self, subjob):
        """
        :type subjob: Subjob
        """
        if not self.is_alive():
            raise DeadSlaveError(
                'Tried to start a subjob on a dead slave! ({}, id: {})'.format(
                    self.url, self.id))

        if self._is_in_shutdown_mode:
            raise SlaveMarkedForShutdownError(
                'Tried to start a subjob on a slave in shutdown mode. ({}, id: {})'
                .format(self.url, self.id))

        # todo: This should not be a SafeThread. https://github.com/box/ClusterRunner/issues/337
        SafeThread(target=self._async_start_subjob, args=(subjob, )).start()
Esempio n. 18
0
    def teardown_build(self, build_id=None):
        """
        Called at the end of each build on each slave before it reports back to the master that it is idle again.

        :param build_id: The build id to teardown -- this parameter is used solely for correctness checking of the
            master, to make sure that the master is not erroneously sending teardown commands for other builds.
        :type build_id: int | None
        """
        if self._current_build_id is None:
            raise BadRequestError('Tried to teardown a build but no build is active on this slave.')

        if build_id is not None and build_id != self._current_build_id:
            raise BadRequestError('Tried to teardown build {}, '
                                  'but slave is running build {}!'.format(build_id, self._current_build_id))
        SafeThread(
            target=self._async_teardown_build,
            name='Bld{}-Teardwn'.format(build_id)
        ).start()
    def _build_preparation_loop(self):
        """
        Grabs a build off the request_queue (populated by self.handle_build_request()), prepares it,
        and puts that build onto the self.builds_waiting_for_slaves queue.
        """
        while True:
            build = self._request_queue.get()
            project_id = build.project_type.project_id()

            if project_id not in self._project_preparation_locks:
                self._logger.info('Creating project lock [{}] for build {}',
                                  project_id, str(build.build_id()))
                self._project_preparation_locks[project_id] = Lock()

            project_lock = self._project_preparation_locks[project_id]
            SafeThread(target=self._prepare_build_async,
                       name='Bld{}-PreparationThread'.format(build.build_id()),
                       args=(build, project_lock)).start()
    def test_normal_execution_on_safe_thread_does_not_call_teardown_callbacks(
            self):
        my_lonely_teardown_callback = MagicMock()
        unhandled_exception_handler = UnhandledExceptionHandler.singleton()
        unhandled_exception_handler.add_teardown_callback(
            my_lonely_teardown_callback)

        def my_fantastic_method():
            print('Veritas vos liberabit!')

        thread = SafeThread(target=my_fantastic_method)
        thread.start()
        thread.join()

        self.assertFalse(
            my_lonely_teardown_callback.called,
            'The teardown callback should not be called unless an exception is raised.'
        )
Esempio n. 21
0
 def start_heartbeat_thread(self):
     self._logger.info('Heartbeat will run every {} seconds'.format(
         self._heartbeat_interval))
     SafeThread(target=self._start_heartbeat,
                name='HeartbeatThread',
                daemon=True).start()