def test_calling_kill_subprocesses_will_break_out_of_command_execution_wait_loop(self):

        def fake_communicate(timeout=None):
            # The fake implementation is that communicate() times out forever until os.killpg is called.
            if mock_killpg.call_count == 0 and timeout is not None:
                raise TimeoutExpired(None, timeout)
            elif mock_killpg.call_count > 0:
                return b'fake output', b'fake error'
            self.fail('Popen.communicate() should not be called without a timeout before os.killpg has been called.')

        mock_killpg = self.patch('os.killpg')
        self.mock_popen.communicate.side_effect = fake_communicate
        self.mock_popen.returncode = 1
        self.mock_popen.pid = 55555
        project_type = ProjectType()
        command_thread = SafeThread(target=project_type.execute_command_in_project, args=('echo The power is yours!',))

        # This calls execute_command_in_project() on one thread, and calls kill_subprocesses() on another. The
        # kill_subprocesses() call should cause the first thread to exit.
        command_thread.start()
        project_type.kill_subprocesses()

        # This *should* join immediately, but we specify a timeout just in case something goes wrong so that the test
        # doesn't hang. A successful join implies success. We also use the UnhandledExceptionHandler so that exceptions
        # propagate from the child thread to the test thread and fail the test.
        with UnhandledExceptionHandler.singleton():
            command_thread.join(timeout=10)
            if command_thread.is_alive():
                mock_killpg()  # Calling killpg() causes the command thread to end.
                self.fail('project_type.kill_subprocesses should cause the command execution wait loop to exit.')

        mock_killpg.assert_called_once_with(pgid=55555, sig=ANY)
Esempio n. 2
0
class SlaveAllocator(object):
    """
    The SlaveAllocator class is responsible for allocating slaves to prepared builds.
    """

    def __init__(self, build_request_handler):
        """
        :type build_request_handler: BuildRequestHandler
        """
        self._logger = get_logger(__name__)
        self._build_request_handler = build_request_handler
        self._idle_slaves = OrderedSetQueue()
        self._allocation_thread = SafeThread(
            target=self._slave_allocation_loop, name='SlaveAllocationLoop', daemon=True)

    def start(self):
        """
        Start the infinite loop that will pull off prepared builds from a synchronized queue
        and allocate them slaves.
        """
        if self._allocation_thread.is_alive():
            raise RuntimeError('Error: slave allocation loop was asked to start when its already running.')
        self._allocation_thread.start()

    def _slave_allocation_loop(self):
        """
        Builds wait in line for more slaves. This method executes in the background on another thread and
        watches for idle slaves, then gives them out to the waiting builds.
        """
        while True:
            # This is a blocking call that will block until there is a prepared build.
            build_waiting_for_slave = self._build_request_handler.next_prepared_build()

            while build_waiting_for_slave.needs_more_slaves():
                claimed_slave = self._idle_slaves.get()

                # Remove dead slaves from the idle queue
                if not claimed_slave.is_alive(use_cached=False):
                    continue

                # The build may have completed while we were waiting for an idle slave, so check one more time.
                if build_waiting_for_slave.needs_more_slaves():
                    # Potential race condition here!  If the build completes after the if statement is checked,
                    # a slave will be allocated needlessly (and run slave.setup(), which can be significant work).
                    self._logger.info('Allocating slave {} to build {}.',
                                      claimed_slave.url, build_waiting_for_slave.build_id())
                    build_waiting_for_slave.allocate_slave(claimed_slave)
                else:
                    self.add_idle_slave(claimed_slave)

            self._logger.info('Done allocating slaves for build {}.', build_waiting_for_slave.build_id())

    def add_idle_slave(self, slave):
        """
        Add a slave to the idle queue.

        :type slave: Slave
        """
        slave.mark_as_idle()
        self._idle_slaves.put(slave)
Esempio n. 3
0
    def test_calling_kill_subprocesses_will_break_out_of_command_execution_wait_loop(
            self):
        self._mock_stdout_and_stderr(b'fake_output', b'fake_error')
        self.mock_popen.pid = 55555
        self._simulate_hanging_popen_process()

        project_type = ProjectType()
        command_thread = SafeThread(
            target=project_type.execute_command_in_project,
            args=('echo The power is yours!', ))

        # This calls execute_command_in_project() on one thread, and calls kill_subprocesses() on another. The
        # kill_subprocesses() call should cause the first thread to exit.
        command_thread.start()
        project_type.kill_subprocesses()

        # This *should* join immediately, but we specify a timeout just in case something goes wrong so that the test
        # doesn't hang. A successful join implies success. We also use the UnhandledExceptionHandler so that exceptions
        # propagate from the child thread to the test thread and fail the test.
        with UnhandledExceptionHandler.singleton():
            command_thread.join(timeout=10)
            if command_thread.is_alive():
                self.mock_killpg(
                )  # Calling killpg() causes the command thread to end.
                self.fail(
                    'project_type.kill_subprocesses should cause the command execution wait loop to exit.'
                )

        self.mock_killpg.assert_called_once_with(
            55555, ANY)  # Note: os.killpg does not accept keyword args.
    def test_calling_kill_subprocesses_will_break_out_of_command_execution_wait_loop(self):
        self._mock_out_popen_communicate()

        project_type = ProjectType()
        command_thread = SafeThread(target=project_type.execute_command_in_project, args=('echo The power is yours!',))

        # This calls execute_command_in_project() on one thread, and calls kill_subprocesses() on another. The
        # kill_subprocesses() call should cause the first thread to exit.
        command_thread.start()
        project_type.kill_subprocesses()

        # This *should* join immediately, but we specify a timeout just in case something goes wrong so that the test
        # doesn't hang. A successful join implies success. We also use the UnhandledExceptionHandler so that exceptions
        # propagate from the child thread to the test thread and fail the test.
        with UnhandledExceptionHandler.singleton():
            command_thread.join(timeout=10)
            if command_thread.is_alive():
                self.mock_killpg()  # Calling killpg() causes the command thread to end.
                self.fail('project_type.kill_subprocesses should cause the command execution wait loop to exit.')

        self.mock_killpg.assert_called_once_with(55555, ANY)  # Note: os.killpg does not accept keyword args.
class BuildRequestHandler(object):
    """
    The BuildRequestHandler class is responsible for preparing a non-prepared build.

    Implementation notes:

    This class manages two critical Queue's in ClusterRunner: request_queue and builds_waiting_for_slaves.

    The request_queue is the queue of non-prepared Build instances that the BuildRequestHandler has
    yet to prepare. This queue is populated by the ClusterMaster instance.

    The builds_waiting_for_slaves queue is the queue of prepared Build instances that the
    BuildRequestHandler has completed build preparation for, and is waiting for the SlaveAllocator (a separate
    entity) to pull Builds from.

    All of the input of builds come through self.handle_build_request() calls, and all of the output
    of builds go through self.next_prepared_build() calls.
    """
    def __init__(self):
        self._logger = get_logger(__name__)
        self._builds_waiting_for_slaves = Queue()
        self._request_queue = Queue()
        self._request_queue_worker_thread = SafeThread(
            target=self._build_preparation_loop,
            name='RequestHandlerLoop',
            daemon=True)
        self._project_preparation_locks = {}

    def start(self):
        """
        Start the infinite loop that will accept unprepared builds and put them through build preparation.
        """
        if self._request_queue_worker_thread.is_alive():
            raise RuntimeError(
                'Error: build request handler loop was asked to start when its already running.'
            )
        self._request_queue_worker_thread.start()

    def handle_build_request(self, build):
        """
        :param build: the requested build
        :type build: Build
        """
        self._request_queue.put(build)
        analytics.record_event(analytics.BUILD_REQUEST_QUEUED,
                               build_id=build.build_id(),
                               log_msg='Queued request for build {build_id}.')

    def next_prepared_build(self):
        """
        Get the next build that has successfully completed build preparation.

        This is a blocking call--if there are no more builds that have completed build preparation and this
        method gets invoked, the execution will hang until the next build has completed build preparation.

        :rtype: Build
        """
        return self._builds_waiting_for_slaves.get()

    def _build_preparation_loop(self):
        """
        Grabs a build off the request_queue (populated by self.handle_build_request()), prepares it,
        and puts that build onto the self.builds_waiting_for_slaves queue.
        """
        while True:
            build = self._request_queue.get()
            project_id = build.project_type.project_id()

            if project_id not in self._project_preparation_locks:
                self._logger.info('Creating project lock [{}] for build {}',
                                  project_id, str(build.build_id()))
                self._project_preparation_locks[project_id] = Lock()

            project_lock = self._project_preparation_locks[project_id]
            SafeThread(target=self._prepare_build_async,
                       name='Bld{}-PreparationThread'.format(build.build_id()),
                       args=(build, project_lock)).start()

    def _prepare_build_async(self, build, project_lock):
        """
        :type build: Build
        :type project_lock: Lock
        """
        self._logger.info('Build {} is waiting for the project lock',
                          build.build_id())

        with project_lock:
            self._logger.info('Build {} has acquired project lock',
                              build.build_id())
            analytics.record_event(
                analytics.BUILD_PREPARE_START,
                build_id=build.build_id(),
                log_msg=
                'Build preparation loop is handling request for build {build_id}.'
            )
            try:
                self._prepare_build(build)
                if not build.has_error:
                    analytics.record_event(
                        analytics.BUILD_PREPARE_FINISH,
                        build_id=build.build_id(),
                        log_msg=
                        'Build {build_id} successfully prepared and waiting for slaves.'
                    )
                    self._builds_waiting_for_slaves.put(build)
            except Exception as ex:  # pylint: disable=broad-except
                build.mark_failed(str(ex))
                self._logger.exception(
                    'Could not handle build request for build {}.'.format(
                        build.build_id()))

    def _prepare_build(self, build):
        """
        Prepare a Build to be distributed across slaves.

        :param build: the Build instance to be prepared to be distributed across slaves
        :type build: Build
        """
        build_id = build.build_id()
        build_request = build.build_request

        if not isinstance(build_request, BuildRequest):
            raise RuntimeError(
                'Build {} has no associated request object.'.format(build_id))

        project_type = build.project_type
        if not isinstance(project_type, ProjectType):
            raise RuntimeError('Build {} has no project set.'.format(build_id))

        self._logger.info('Fetching project for build {}.', build_id)
        project_type.fetch_project()

        self._logger.info('Successfully fetched project for build {}.',
                          build_id)
        job_config = project_type.job_config()

        if job_config is None:
            build.mark_failed(
                'Build failed while trying to parse cluster_runner.yaml.')
            return

        subjobs = self._compute_subjobs_for_build(build_id, job_config,
                                                  project_type)
        build.prepare(subjobs, job_config)

    def _compute_subjobs_for_build(self, build_id, job_config, project_type):
        """
        :type build_id: int
        :type job_config: JobConfig
        :param project_type: the docker, directory, or git repo project_type that this build is running in
        :type project_type: project_type.project_type.ProjectType
        :rtype: list[Subjob]
        """
        atoms_list = job_config.atomizer.atomize_in_project(project_type)

        # Group the atoms together using some grouping strategy
        timing_file_path = project_type.timing_file_path(job_config.name)
        grouped_atoms = self._grouped_atoms(atoms_list,
                                            job_config.max_executors,
                                            timing_file_path,
                                            project_type.project_directory)

        # Generate subjobs for each group of atoms
        subjobs = []
        for subjob_id in range(len(grouped_atoms)):
            atoms = grouped_atoms[subjob_id]
            subjobs.append(
                Subjob(build_id, subjob_id, project_type, job_config, atoms))
        return subjobs

    def _grouped_atoms(self, atoms, max_executors, timing_file_path,
                       project_directory):
        """
        Return atoms that are grouped for optimal CI performance.

        If a timing file exists, then use the TimeBasedAtomGrouper.
        If not, use the default AtomGrouper (groups each atom into its own subjob).

        :param atoms: all of the atoms to be run this time
        :type atoms: list[app.master.atom.Atom]
        :param max_executors: the maximum number of executors for this build
        :type max_executors: int
        :param timing_file_path: path to where the timing data file would be stored (if it exists) for this job
        :type timing_file_path: str
        :type project_directory: str
        :return: the grouped atoms (in the form of list of lists of strings)
        :rtype: list[list[app.master.atom.Atom]]
        """
        atom_time_map = None

        if os.path.isfile(timing_file_path):
            with open(timing_file_path, 'r') as json_file:
                try:
                    atom_time_map = json.load(json_file)
                except ValueError:
                    self._logger.warning(
                        'Failed to load timing data from file that exists {}',
                        timing_file_path)

        if atom_time_map is not None and len(atom_time_map) > 0:
            atom_grouper = TimeBasedAtomGrouper(atoms, max_executors,
                                                atom_time_map,
                                                project_directory)
        else:
            atom_grouper = AtomGrouper(atoms, max_executors)

        return atom_grouper.groupings()
Esempio n. 6
0
class SlaveAllocator(object):
    """
    The SlaveAllocator class is responsible for allocating slaves to prepared builds.
    """
    def __init__(self, build_request_handler):
        """
        :type build_request_handler: BuildRequestHandler
        """
        self._logger = get_logger(__name__)
        self._build_request_handler = build_request_handler
        self._idle_slaves = OrderedSetQueue()
        self._allocation_thread = SafeThread(
            target=self._slave_allocation_loop,
            name='SlaveAllocationLoop',
            daemon=True)

    def start(self):
        """
        Start the infinite loop that will pull off prepared builds from a synchronized queue
        and allocate them slaves.
        """
        if self._allocation_thread.is_alive():
            raise RuntimeError(
                'Error: slave allocation loop was asked to start when its already running.'
            )
        self._allocation_thread.start()

    def _slave_allocation_loop(self):
        """
        Builds wait in line for more slaves. This method executes in the background on another thread and
        watches for idle slaves, then gives them out to the waiting builds.
        """
        while True:
            # This is a blocking call that will block until there is a prepared build.
            build_waiting_for_slave = self._build_request_handler.next_prepared_build(
            )

            while build_waiting_for_slave.needs_more_slaves():
                claimed_slave = self._idle_slaves.get()

                # Remove dead and shutdown slaves from the idle queue
                if claimed_slave.is_shutdown() or not claimed_slave.is_alive(
                        use_cached=False):
                    continue

                # The build may have completed while we were waiting for an idle slave, so check one more time.
                if build_waiting_for_slave.needs_more_slaves():
                    # Potential race condition here!  If the build completes after the if statement is checked,
                    # a slave will be allocated needlessly (and run slave.setup(), which can be significant work).
                    self._logger.info('Allocating slave {} to build {}.',
                                      claimed_slave.url,
                                      build_waiting_for_slave.build_id())
                    build_waiting_for_slave.allocate_slave(claimed_slave)
                else:
                    self.add_idle_slave(claimed_slave)

            self._logger.info('Done allocating slaves for build {}.',
                              build_waiting_for_slave.build_id())

    def add_idle_slave(self, slave):
        """
        Add a slave to the idle queue.

        :type slave: Slave
        """
        try:
            slave.mark_as_idle()
            self._idle_slaves.put(slave)
        except SlaveMarkedForShutdownError:
            pass
class BuildRequestHandler(object):
    """
    The BuildRequestHandler class is responsible for preparing a non-prepared build.

    Implementation notes:

    This class manages two critical Queue's in ClusterRunner: request_queue and builds_waiting_for_slaves.

    The request_queue is the queue of non-prepared Build instances that the BuildRequestHandler has
    yet to prepare. This queue is populated by the ClusterMaster instance.

    The builds_waiting_for_slaves queue is the queue of prepared Build instances that the
    BuildRequestHandler has completed build preparation for, and is waiting for the SlaveAllocator (a separate
    entity) to pull Builds from.

    All of the input of builds come through self.handle_build_request() calls, and all of the output
    of builds go through self.next_prepared_build_scheduler() calls.
    """
    def __init__(self, scheduler_pool):
        """
        :type scheduler_pool: BuildSchedulerPool
        """
        self._logger = get_logger(__name__)
        self._scheduler_pool = scheduler_pool
        self._builds_waiting_for_slaves = Queue()
        self._request_queue = Queue()
        self._request_queue_worker_thread = SafeThread(
            target=self._build_preparation_loop,
            name='RequestHandlerLoop',
            daemon=True)
        self._project_preparation_locks = {}
        self._subjob_calculator = SubjobCalculator()

    def start(self):
        """
        Start the infinite loop that will accept unprepared builds and put them through build preparation.
        """
        if self._request_queue_worker_thread.is_alive():
            raise RuntimeError(
                'Error: build request handler loop was asked to start when its already running.'
            )
        self._request_queue_worker_thread.start()

    def handle_build_request(self, build):
        """
        :param build: the requested build
        :type build: Build
        """
        self._request_queue.put(build)
        analytics.record_event(analytics.BUILD_REQUEST_QUEUED,
                               build_id=build.build_id(),
                               log_msg='Queued request for build {build_id}.')

    def next_prepared_build_scheduler(self):
        """
        Get the scheduler for the next build that has successfully completed build preparation.

        This is a blocking call--if there are no more builds that have completed build preparation and this
        method gets invoked, the execution will hang until the next build has completed build preparation.

        :rtype: BuildScheduler
        """
        build = self._builds_waiting_for_slaves.get()
        build_scheduler = self._scheduler_pool.get(build)
        return build_scheduler

    def _build_preparation_loop(self):
        """
        Grabs a build off the request_queue (populated by self.handle_build_request()), prepares it,
        and puts that build onto the self.builds_waiting_for_slaves queue.
        """
        while True:
            build = self._request_queue.get()
            project_id = build.project_type.project_id()

            if project_id not in self._project_preparation_locks:
                self._logger.info('Creating project lock [{}] for build {}',
                                  project_id, str(build.build_id()))
                self._project_preparation_locks[project_id] = Lock()

            project_lock = self._project_preparation_locks[project_id]
            SafeThread(target=self._prepare_build_async,
                       name='Bld{}-PreparationThread'.format(build.build_id()),
                       args=(build, project_lock)).start()

    def _prepare_build_async(self, build, project_lock):
        """
        :type build: Build
        :type project_lock: Lock
        """
        self._logger.info('Build {} is waiting for the project lock',
                          build.build_id())

        with project_lock:
            self._logger.info('Build {} has acquired project lock',
                              build.build_id())
            analytics.record_event(
                analytics.BUILD_PREPARE_START,
                build_id=build.build_id(),
                log_msg=
                'Build preparation loop is handling request for build {build_id}.'
            )
            try:
                build.prepare(self._subjob_calculator)
                if not build.has_error:
                    analytics.record_event(
                        analytics.BUILD_PREPARE_FINISH,
                        build_id=build.build_id(),
                        is_success=True,
                        log_msg=
                        'Build {build_id} successfully prepared and waiting for slaves.'
                    )
                    self._builds_waiting_for_slaves.put(build)

            except Exception as ex:  # pylint: disable=broad-except
                build.mark_failed(str(ex))
                self._logger.exception(
                    'Could not handle build request for build {}.'.format(
                        build.build_id()))
                analytics.record_event(analytics.BUILD_PREPARE_FINISH,
                                       build_id=build.build_id(),
                                       is_success=False)
class BuildRequestHandler(object):
    """
    The BuildRequestHandler class is responsible for preparing a non-prepared build.

    Implementation notes:

    This class manages two critical Queues in ClusterRunner: request_queue and builds_waiting_for_slaves.

    The request_queue is the queue of non-prepared Build instances that the BuildRequestHandler has
    yet to prepare. This queue is populated by the ClusterMaster instance.

    The builds_waiting_for_slaves queue is the queue of prepared Build instances that the
    BuildRequestHandler has completed build preparation for, and is waiting for the SlaveAllocator (a separate
    entity) to pull Builds from.

    All of the input of builds come through self.handle_build_request() calls, and all of the output
    of builds go through self.next_prepared_build_scheduler() calls.
    """
    def __init__(self, scheduler_pool):
        """
        :type scheduler_pool: BuildSchedulerPool
        """
        self._logger = get_logger(__name__)
        self._scheduler_pool = scheduler_pool
        self._builds_waiting_for_slaves = Queue()
        self._request_queue = Queue()
        self._request_queue_worker_thread = SafeThread(
            target=self._build_preparation_loop, name='RequestHandlerLoop', daemon=True)
        self._project_preparation_locks = {}
        self._subjob_calculator = SubjobCalculator()

    def start(self):
        """
        Start the infinite loop that will accept unprepared builds and put them through build preparation.
        """
        if self._request_queue_worker_thread.is_alive():
            raise RuntimeError('Error: build request handler loop was asked to start when its already running.')
        self._request_queue_worker_thread.start()

    def handle_build_request(self, build):
        """
        :param build: the requested build
        :type build: Build
        """
        self._request_queue.put(build)
        analytics.record_event(analytics.BUILD_REQUEST_QUEUED, build_id=build.build_id(),
                               log_msg='Queued request for build {build_id}.')

    def next_prepared_build_scheduler(self):
        """
        Get the scheduler for the next build that has successfully completed build preparation.

        This is a blocking call--if there are no more builds that have completed build preparation and this
        method gets invoked, the execution will hang until the next build has completed build preparation.

        :rtype: BuildScheduler
        """
        build = self._builds_waiting_for_slaves.get()
        build_scheduler = self._scheduler_pool.get(build)
        return build_scheduler

    def _build_preparation_loop(self):
        """
        Grabs a build off the request_queue (populated by self.handle_build_request()), prepares it,
        and puts that build onto the self.builds_waiting_for_slaves queue.
        """
        while True:
            build = self._request_queue.get()
            project_id = build.project_type.project_id()

            if project_id not in self._project_preparation_locks:
                self._logger.info('Creating project lock [{}] for build {}', project_id, str(build.build_id()))
                self._project_preparation_locks[project_id] = Lock()

            project_lock = self._project_preparation_locks[project_id]
            SafeThread(
                target=self._prepare_build_async,
                name='Bld{}-PreparationThread'.format(build.build_id()),
                args=(build, project_lock)
            ).start()

    def _prepare_build_async(self, build, project_lock):
        """
        :type build: Build
        :type project_lock: Lock
        """
        self._logger.info('Build {} is waiting for the project lock', build.build_id())

        with project_lock:
            self._logger.info('Build {} has acquired project lock', build.build_id())
            analytics.record_event(analytics.BUILD_PREPARE_START, build_id=build.build_id(),
                                   log_msg='Build preparation loop is handling request for build {build_id}.')
            try:
                build.prepare(self._subjob_calculator)
                if not build.has_error:
                    analytics.record_event(analytics.BUILD_PREPARE_FINISH, build_id=build.build_id(), is_success=True,
                                           log_msg='Build {build_id} successfully prepared.')
                    # If the atomizer found no work to do, perform build cleanup and skip the slave allocation.
                    if len(build.all_subjobs()) == 0:
                        self._logger.info('Build {} has no work to perform and is exiting.', build.build_id())
                        build.finish()
                    # If there is work to be done, this build must queue to be allocated slaves.
                    else:
                        self._logger.info('Build {} is waiting for slaves.', build.build_id())
                        self._builds_waiting_for_slaves.put(build)

            except Exception as ex:  # pylint: disable=broad-except
                build.mark_failed(str(ex))  # WIP(joey): Build should do this internally.
                self._logger.exception('Could not handle build request for build {}.'.format(build.build_id()))
                analytics.record_event(analytics.BUILD_PREPARE_FINISH, build_id=build.build_id(), is_success=False)
Esempio n. 9
0
class BuildRequestHandler(object):
    """
    The BuildRequestHandler class is responsible for preparing a non-prepared build.

    Implementation notes:

    This class manages two critical Queues in ClusterRunner: request_queue and builds_waiting_for_slaves.

    The request_queue is the queue of non-prepared Build instances that the BuildRequestHandler has
    yet to prepare. This queue is populated by the ClusterMaster instance.

    The builds_waiting_for_slaves queue is the queue of prepared Build instances that the
    BuildRequestHandler has completed build preparation for, and is waiting for the SlaveAllocator (a separate
    entity) to pull Builds from.

    All of the input of builds come through self.handle_build_request() calls, and all of the output
    of builds go through self._scheduler_pool.next_prepared_build_scheduler() calls.
    """
    def __init__(self, scheduler_pool):
        """
        :type scheduler_pool: app.master.build_scheduler_pool.BuildSchedulerPool
        """
        self._logger = get_logger(__name__)
        self._scheduler_pool = scheduler_pool
        self._request_queue = Queue()
        self._request_queue_worker_thread = SafeThread(
            target=self._build_preparation_loop,
            name='RequestHandlerLoop',
            daemon=True)
        self._project_preparation_locks = {}

    def start(self):
        """
        Start the infinite loop that will accept unprepared builds and put them through build preparation.
        """
        if self._request_queue_worker_thread.is_alive():
            raise RuntimeError(
                'Error: build request handler loop was asked to start when its already running.'
            )
        self._request_queue_worker_thread.start()

    def handle_build_request(self, build):
        """
        :param build: the requested build
        :type build: Build
        """
        self._request_queue.put(build)
        analytics.record_event(analytics.BUILD_REQUEST_QUEUED,
                               build_id=build.build_id(),
                               log_msg='Queued request for build {build_id}.')

    def _build_preparation_loop(self):
        """
        Grabs a build off the request_queue (populated by self.handle_build_request()), prepares it,
        and puts that build onto the self.builds_waiting_for_slaves queue.
        """
        while True:
            build = self._request_queue.get()
            project_id = build.project_type.project_id()

            if project_id not in self._project_preparation_locks:
                self._logger.info('Creating project lock [{}] for build {}',
                                  project_id, str(build.build_id()))
                self._project_preparation_locks[project_id] = Lock()

            project_lock = self._project_preparation_locks[project_id]
            SafeThread(target=self._prepare_build_async,
                       name='Bld{}-PreparationThread'.format(build.build_id()),
                       args=(build, project_lock)).start()

    def _prepare_build_async(self, build, project_lock):
        """
        :type build: app.master.build.Build
        :type project_lock: Lock
        """
        self._logger.info('Build {} is waiting for the project lock',
                          build.build_id())

        with project_lock:
            self._logger.info('Build {} has acquired project lock',
                              build.build_id())
            analytics.record_event(
                analytics.BUILD_PREPARE_START,
                build_id=build.build_id(),
                log_msg=
                'Build preparation loop is handling request for build {build_id}.'
            )
            try:
                build.prepare()
                if not build.is_stopped:
                    analytics.record_event(
                        analytics.BUILD_PREPARE_FINISH,
                        build_id=build.build_id(),
                        is_success=True,
                        log_msg='Build {build_id} successfully prepared.')
                    # If the atomizer found no work to do, perform build cleanup and skip the slave allocation.
                    if len(build.get_subjobs()) == 0:
                        self._logger.info(
                            'Build {} has no work to perform and is exiting.',
                            build.build_id())
                        build.finish()
                    # If there is work to be done, this build must queue to be allocated slaves.
                    else:
                        self._logger.info('Build {} is waiting for slaves.',
                                          build.build_id())
                        self._scheduler_pool.add_build_waiting_for_slaves(
                            build)

            except Exception as ex:  # pylint: disable=broad-except
                if not build.is_canceled:
                    build.mark_failed(
                        str(ex))  # WIP(joey): Build should do this internally.
                    self._logger.exception(
                        'Could not handle build request for build {}.'.format(
                            build.build_id()))
                analytics.record_event(analytics.BUILD_PREPARE_FINISH,
                                       build_id=build.build_id(),
                                       is_success=False)
class BuildRequestHandler(object):
    """
    The BuildRequestHandler class is responsible for preparing a non-prepared build.

    Implementation notes:

    This class manages two critical Queue's in ClusterRunner: request_queue and builds_waiting_for_slaves.

    The request_queue is the queue of non-prepared Build instances that the BuildRequestHandler has
    yet to prepare. This queue is populated by the ClusterMaster instance.

    The builds_waiting_for_slaves queue is the queue of prepared Build instances that the
    BuildRequestHandler has completed build preparation for, and is waiting for the SlaveAllocator (a separate
    entity) to pull Builds from.

    All of the input of builds come through self.handle_build_request() calls, and all of the output
    of builds go through self.next_prepared_build() calls.
    """

    def __init__(self):
        self._logger = get_logger(__name__)
        self._builds_waiting_for_slaves = Queue()
        self._request_queue = Queue()
        self._request_queue_worker_thread = SafeThread(
            target=self._build_preparation_loop, name='RequestHandlerLoop', daemon=True)
        self._project_preparation_locks = {}

    def start(self):
        """
        Start the infinite loop that will accept unprepared builds and put them through build preparation.
        """
        if self._request_queue_worker_thread.is_alive():
            raise RuntimeError('Error: build request handler loop was asked to start when its already running.')
        self._request_queue_worker_thread.start()

    def handle_build_request(self, build):
        """
        :param build: the requested build
        :type build: Build
        """
        self._request_queue.put(build)
        analytics.record_event(analytics.BUILD_REQUEST_QUEUED, build_id=build.build_id(),
                               log_msg='Queued request for build {build_id}.')

    def next_prepared_build(self):
        """
        Get the next build that has successfully completed build preparation.

        This is a blocking call--if there are no more builds that have completed build preparation and this
        method gets invoked, the execution will hang until the next build has completed build preparation.

        :rtype: Build
        """
        return self._builds_waiting_for_slaves.get()

    def _build_preparation_loop(self):
        """
        Grabs a build off the request_queue (populated by self.handle_build_request()), prepares it,
        and puts that build onto the self.builds_waiting_for_slaves queue.
        """
        while True:
            build = self._request_queue.get()
            project_id = build.project_type.project_id()

            if project_id not in self._project_preparation_locks:
                self._logger.info('Creating project lock [{}] for build {}', project_id, str(build.build_id()))
                self._project_preparation_locks[project_id] = Lock()

            project_lock = self._project_preparation_locks[project_id]
            SafeThread(
                target=self._prepare_build_async,
                name='Bld{}-PreparationThread'.format(build.build_id()),
                args=(build, project_lock)
            ).start()

    def _prepare_build_async(self, build, project_lock):
        """
        :type build: Build
        :type project_lock: Lock
        """
        self._logger.info('Build {} is waiting for the project lock', build.build_id())

        with project_lock:
            self._logger.info('Build {} has acquired project lock', build.build_id())
            analytics.record_event(analytics.BUILD_PREPARE_START, build_id=build.build_id(),
                                   log_msg='Build preparation loop is handling request for build {build_id}.')
            try:
                self._prepare_build(build)
                if not build.has_error:
                    analytics.record_event(analytics.BUILD_PREPARE_FINISH, build_id=build.build_id(),
                                           log_msg='Build {build_id} successfully prepared and waiting for slaves.')
                    self._builds_waiting_for_slaves.put(build)
            except Exception as ex:  # pylint: disable=broad-except
                build.mark_failed(str(ex))
                self._logger.exception('Could not handle build request for build {}.'.format(build.build_id()))

    def _prepare_build(self, build):
        """
        Prepare a Build to be distributed across slaves.

        :param build: the Build instance to be prepared to be distributed across slaves
        :type build: Build
        """
        build_id = build.build_id()
        build_request = build.build_request

        if not isinstance(build_request, BuildRequest):
            raise RuntimeError('Build {} has no associated request object.'.format(build_id))

        project_type = build.project_type
        if not isinstance(project_type, ProjectType):
            raise RuntimeError('Build {} has no project set.'.format(build_id))

        self._logger.info('Fetching project for build {}.', build_id)
        project_type.fetch_project()

        self._logger.info('Successfully fetched project for build {}.', build_id)
        job_config = project_type.job_config()

        if job_config is None:
            build.mark_failed('Build failed while trying to parse cluster_runner.yaml.')
            return

        subjobs = self._compute_subjobs_for_build(build_id, job_config, project_type)
        build.prepare(subjobs, job_config)

    def _compute_subjobs_for_build(self, build_id, job_config, project_type):
        """
        :type build_id: int
        :type job_config: JobConfig
        :param project_type: the directory, or git repo project_type that this build is running in
        :type project_type: project_type.project_type.ProjectType
        :rtype: list[Subjob]
        """
        atoms_list = job_config.atomizer.atomize_in_project(project_type)

        # Group the atoms together using some grouping strategy
        timing_file_path = project_type.timing_file_path(job_config.name)
        grouped_atoms = self._grouped_atoms(
            atoms_list,
            job_config.max_executors,
            timing_file_path,
            project_type.project_directory
        )

        # Generate subjobs for each group of atoms
        subjobs = []
        for subjob_id in range(len(grouped_atoms)):
            atoms = grouped_atoms[subjob_id]
            subjobs.append(Subjob(build_id, subjob_id, project_type, job_config, atoms))
        return subjobs

    def _grouped_atoms(self, atoms, max_executors, timing_file_path, project_directory):
        """
        Return atoms that are grouped for optimal CI performance.

        If a timing file exists, then use the TimeBasedAtomGrouper.
        If not, use the default AtomGrouper (groups each atom into its own subjob).

        :param atoms: all of the atoms to be run this time
        :type atoms: list[app.master.atom.Atom]
        :param max_executors: the maximum number of executors for this build
        :type max_executors: int
        :param timing_file_path: path to where the timing data file would be stored (if it exists) for this job
        :type timing_file_path: str
        :type project_directory: str
        :return: the grouped atoms (in the form of list of lists of strings)
        :rtype: list[list[app.master.atom.Atom]]
        """
        atom_time_map = None

        if os.path.isfile(timing_file_path):
            with open(timing_file_path, 'r') as json_file:
                try:
                    atom_time_map = json.load(json_file)
                except ValueError:
                    self._logger.warning('Failed to load timing data from file that exists {}', timing_file_path)

        if atom_time_map is not None and len(atom_time_map) > 0:
            atom_grouper = TimeBasedAtomGrouper(atoms, max_executors, atom_time_map, project_directory)
        else:
            atom_grouper = AtomGrouper(atoms, max_executors)

        return atom_grouper.groupings()