def main(options):
    """Main script.

    @param options: Options to run the script.
    """
    config = logging_config.LoggingConfig()
    if options.logfile:
        config.add_file_handler(
            file_path=os.path.abspath(options.logfile),
            level=logging.DEBUG if options.verbose else logging.INFO)

    bucket = lxc.ContainerBucket()
    logging.info('')
    logging.info('Cleaning container bucket %s', bucket.container_path)
    success_count = 0
    failure_count = 0
    for container in bucket.get_all().values():
        if is_container_orphaned(container):
            if cleanup(container, options):
                success_count += 1
            else:
                failure_count += 1
    if options.execute:
        key = 'container_cleanup.%s' % socket.gethostname().replace('.', '_')
        autotest_stats.Gauge(key).send('success', success_count)
        autotest_stats.Gauge(key).send('failure', failure_count)
    logging.info('Cleanup finished.')
Ejemplo n.º 2
0
    def kill_processes(self, process_list):
        """Send signals escalating in severity to the processes in process_list.

        @param process_list: A list of drone_manager.Process objects
                             representing the processes to kill.
        """
        kill_proc_key = 'kill_processes'
        autotest_stats.Gauge(_STATS_KEY).send('%s.%s' % (kill_proc_key, 'net'),
                                              len(process_list))
        try:
            logging.info('List of process to be killed: %s', process_list)
            processes_to_kill = {}
            for p in process_list:
                signal_queue = self.get_signal_queue_to_kill(p)
                processes_to_kill[signal_queue] = (
                    processes_to_kill.get(signal_queue, []) + [p])
            sig_counts = {}
            for signal_queue, processes in processes_to_kill.iteritems():
                sig_counts.update(
                    utils.nuke_pids([-process.pid for process in processes],
                                    signal_queue=signal_queue))
            for name, count in sig_counts.iteritems():
                autotest_stats.Gauge(_STATS_KEY).send(
                    '%s.%s' % (kill_proc_key, name), count)
        except error.AutoservRunError as e:
            self._warn('Error occured when killing processes. Error: %s' % e)
    def do_heartbeat(self):
        """Perform a heartbeat: Retreive new jobs.

        This function executes a `shard_heartbeat` RPC. It retrieves the
        response of this call and processes the response by storing the returned
        objects in the local database.
        """
        logging.info("Performing heartbeat.")
        packet = self._heartbeat_packet()
        autotest_stats.Gauge(STATS_KEY).send('heartbeat.request_size',
                                             len(str(packet)))

        try:
            response = self.afe.run(HEARTBEAT_AFE_ENDPOINT, **packet)
        except urllib2.HTTPError as e:
            self._heartbeat_failure("HTTPError %d: %s" % (e.code, e.reason))
            return
        except urllib2.URLError as e:
            self._heartbeat_failure("URLError: %s" % e.reason)
            return
        except httplib.HTTPException as e:
            self._heartbeat_failure("HTTPException: %s" % e)
            return
        except timeout_util.TimeoutError as e:
            self._heartbeat_failure("TimeoutError: %s" % e)
            return

        autotest_stats.Gauge(STATS_KEY).send('heartbeat.response_size',
                                             len(str(response)))
        self._mark_jobs_as_uploaded([job['id'] for job in packet['jobs']])
        self.process_heartbeat_response(response)
        logging.info("Heartbeat completed.")
Ejemplo n.º 4
0
def _run():
    """Report metadata in the queue until being aborted.
    """
    # Time when the first time upload failed. None if the last upload succeeded.
    first_failed_upload = None
    # True if email alert was sent when upload has been failing continuously
    # for _MAX_UPLOAD_FAIL_DURATION seconds.
    email_alert = False
    upload_size = _MIN_RETRY_ENTRIES
    try:
        while True:
            start_time = time.time()
            data_list = []
            if (first_failed_upload and time.time() - first_failed_upload >
                    _MAX_UPLOAD_FAIL_DURATION):
                upload_size = _MIN_RETRY_ENTRIES
                if not email_alert:
                    _email_alert()
                    email_alert = True
            else:
                upload_size = min(upload_size * 2, _MAX_UPLOAD_SIZE)
            while (not metadata_queue.empty()
                   and len(data_list) < upload_size):
                data_list.append(metadata_queue.get_nowait())
            if data_list:
                if autotest_es.bulk_post(data_list=data_list):
                    time_used = time.time() - start_time
                    logging.info(
                        '%d entries of metadata uploaded in %s '
                        'seconds.', len(data_list), time_used)
                    autotest_stats.Timer('metadata_reporter').send(
                        'time_used', time_used)
                    autotest_stats.Gauge('metadata_reporter').send(
                        'entries_uploaded', len(data_list))
                    first_failed_upload = None
                    email_alert = False
                else:
                    logging.warn(
                        'Failed to upload %d entries of metadata, '
                        'they will be retried later.', len(data_list))
                    autotest_stats.Gauge('metadata_reporter').send(
                        'entries_failed', len(data_list))
                    for data in data_list:
                        queue(data)
                    if not first_failed_upload:
                        first_failed_upload = time.time()
            sleep_time = _REPORT_INTERVAL_SECONDS - time.time() + start_time
            if sleep_time < 0:
                sleep_time = 0.5
            _abort.wait(timeout=sleep_time)
    except Exception as e:
        logging.error('Metadata reporter thread failed with error: %s', e)
        raise
    finally:
        logging.info('Metadata reporting thread is exiting.')
        _abort.clear()
        _report_lock.release()
    def process_heartbeat_response(self, heartbeat_response):
        """Save objects returned by a heartbeat to the local database.

        This deseralizes hosts and jobs including their dependencies and saves
        them to the local database.

        @param heartbeat_response: A dictionary with keys 'hosts' and 'jobs',
                                   as returned by the `shard_heartbeat` rpc
                                   call.
        """
        hosts_serialized = heartbeat_response['hosts']
        jobs_serialized = heartbeat_response['jobs']
        suite_keyvals_serialized = heartbeat_response['suite_keyvals']

        autotest_stats.Gauge(STATS_KEY).send('hosts_received',
                                             len(hosts_serialized))
        autotest_stats.Gauge(STATS_KEY).send('jobs_received',
                                             len(jobs_serialized))
        autotest_stats.Gauge(STATS_KEY).send('suite_keyvals_received',
                                             len(suite_keyvals_serialized))

        self._deserialize_many(hosts_serialized, models.Host, 'host')
        self._deserialize_many(jobs_serialized, models.Job, 'job')
        self._deserialize_many(suite_keyvals_serialized, models.JobKeyval,
                               'jobkeyval')

        host_ids = [h['id'] for h in hosts_serialized]
        logging.info('Heartbeat response contains hosts %s', host_ids)
        job_ids = [j['id'] for j in jobs_serialized]
        logging.info('Heartbeat response contains jobs %s', job_ids)
        parent_jobs_with_keyval = set(
            [kv['job_id'] for kv in suite_keyvals_serialized])
        logging.info('Heartbeat response contains suite_keyvals_for jobs %s',
                     list(parent_jobs_with_keyval))

        # If the master has just sent any jobs that we think have completed,
        # re-sync them with the master. This is especially useful when a
        # heartbeat or job is silently dropped, as the next heartbeat will
        # have a disagreement. Updating the shard_id to NULL will mark these
        # jobs for upload on the next heartbeat.
        job_models = models.Job.objects.filter(id__in=job_ids,
                                               hostqueueentry__complete=True)
        if job_models:
            job_models.update(shard=None)
            job_ids_repr = ', '.join([str(job.id) for job in job_models])
            logging.warn(
                'Following completed jobs are reset shard_id to NULL '
                'to be uploaded to master again: %s', job_ids_repr)
Ejemplo n.º 6
0
def main():
    """Counts the number of AFE jobs in the last day, then pushes the count to statsd"""
    parser = argparse.ArgumentParser(description=(
        'A script which records the number of afe jobs run in a time interval.'
    ))
    parser.parse_args(sys.argv[1:])
    count = number_of_jobs_since(timedelta(days=1))
    autotest_stats.Gauge('jobs_rate').send('afe_daily_count', count)
Ejemplo n.º 7
0
 def _execute_calls(self, calls):
     autotest_stats.Gauge('drone_execute_call_count').send(
         self.hostname.replace('.', '_'), len(calls))
     return_message = self._execute_calls_impl(calls)
     for warning in return_message['warnings']:
         subject = 'Warning from drone %s' % self.hostname
         logging.warning(subject + '\n' + warning)
         email_manager.manager.enqueue_notify_email(subject, warning)
     return return_message['results']
Ejemplo n.º 8
0
def time_wait(server):
    """
    Submits a stat for the number of TIME_WAIT sockets that are on the server.

    @param server: The AFE server.
    """
    out = infra.execute_command(server, 'ss -o state time-wait | wc -l')
    stat = autotest_stats.Gauge(server, bare=True)
    # ss prints out a header for the columns also, so we subtract one to report
    # about only the data.
    stat.send('time_wait_sockets', int(out.strip()) - 1)
Ejemplo n.º 9
0
def num_devserver_processes(server):
    """
    Submits a stat for the number of devserver processes that are on the server.

    @param server: The AFE server.
    """
    out = infra.execute_command(server, 'ps -C devserver.py| wc -l')
    stat = autotest_stats.Gauge(server, bare=True)
    # ps prints out a header for the columns also, so we subtract one to report
    # about only the data.
    stat.send('num_devserver_processes', int(out.strip()) - 1)
    def _schedule_jobs(self):
        """Schedule new jobs against hosts."""

        key = 'host_scheduler.jobs_per_tick'
        new_jobs_with_hosts = 0
        queue_entries = self.job_query_manager.get_pending_queue_entries(
            only_hostless=False)
        unverified_host_jobs = [
            job for job in queue_entries if not job.is_hostless()
        ]
        if not unverified_host_jobs:
            return
        for acquisition in self.find_hosts_for_jobs(unverified_host_jobs):
            self.schedule_host_job(acquisition.host, acquisition.job)
            self._record_host_assignment(acquisition.host, acquisition.job)
            new_jobs_with_hosts += 1
        autotest_stats.Gauge(key).send('new_jobs_with_hosts',
                                       new_jobs_with_hosts)
        autotest_stats.Gauge(key).send(
            'new_jobs_without_hosts',
            len(unverified_host_jobs) - new_jobs_with_hosts)
 def _compute_active_processes(self, drone):
     drone.active_processes = 0
     for pidfile_id, contents in self._pidfiles.iteritems():
         is_running = contents.exit_status is None
         on_this_drone = (contents.process
                          and contents.process.hostname == drone.hostname)
         if is_running and on_this_drone:
             info = self._registered_pidfile_info[pidfile_id]
             if info.num_processes is not None:
                 drone.active_processes += info.num_processes
     autotest_stats.Gauge(self._STATS_KEY).send(
             '%s.%s' % (drone.hostname.replace('.', '_'),
                        'active_processes'), drone.active_processes)
Ejemplo n.º 12
0
def main():
    logging_manager.configure_logging(
        drone_logging_config.DroneLoggingConfig())
    with timer.get_client('decode'):
        calls = parse_input()
    args = _parse_args(sys.argv[1:])
    if args.call_time is not None:
        autotest_stats.Gauge(_STATS_KEY).send('invocation_overhead',
                                              time.time() - args.call_time)

    drone_utility = DroneUtility()
    return_value = drone_utility.execute_calls(calls)
    with timer.get_client('encode'):
        return_data(return_value)
def check_proc(prog, max_elapsed_sec):
    """Check the number of long-running processes for a given program.

    Finds out the number of processes for a given program that have run
    more than a given elapsed time.
    Sends out the number to stats dashboard.

    @param prog: Program name.
    @param max_elapsed_sec: Max elapsed time in seconds. Processes that
                            have run more than this value will be caught.
    """
    cmd = ('ps -eo etimes,args | grep "%s" | awk \'{if($1 > %d) print $0}\' | '
           'wc -l' % (prog, max_elapsed_sec))
    count = int(subprocess.check_output(cmd, shell=True))
    autotest_stats.Gauge(STATS_KEY).send(prog, count)
Ejemplo n.º 14
0
def rpcs_per_sec(server):
    """
    Scrape the requests/sec number off of the apache server-status page and
    submit it as a stat to statsd.

    @param server: The AFE server.
    """
    try:
        page = requests.get('http://%s/server-status' % server).text
    except requests.ConnectionError as e:
        logging.exception(e)
        return

    m = re.search("(\d+) requests/sec", page)
    if m:
        val = int(m.groups(0)[0])
        stat = autotest_stats.Gauge(server, bare=True)
        stat.send('requests_per_sec', val)
Ejemplo n.º 15
0
class RequestAccountant(object):
    """A helper class that count requests and manages min_duts requirement.

    On initialization, this object takes a list of host requests.
    It will batch the requests by grouping similar requests together
    and generate a mapping from unique request-> count of the request.
    It will also generates a mapping from suite_job_id -> min_duts.

    RDB does a two-round of host aquisition. The first round attempts
    to get min_duts for each suite. The second round attemps to satisfy
    the rest requests.  RDB calls get_min_duts and get_rest to
    figure out how many duts it should attempt to get for a unique
    request in the first and second round respectively.

    Assume we have two distinct requests
          R1 (parent_job_id: 10, need hosts: 2)
          R2 (parent_job_id: 10, need hosts: 4)
    And parent job P (job_id:10) has min dut requirement of 3. So we got
          requests_to_counts = {R1: 2, R2: 4}
          min_duts_map = {P: 3}

    First round acquiring:
    Call get_min_duts(R1)
          return 2, because P hasn't reach its min dut limit (3) yet
          requests_to_counts -> {R1: 2-2=0, R2: 4}
          min_duts_map -> {P: 3-2=1}

    Call get_min_duts(R2)
         return 1, because although R2 needs 4 duts, P's min dut limit is now 1
          requests_to_counts -> {R1: 0, R2: 4-1=3}
          min_duts_map -> {P: 1-1=0}

    Second round acquiring:
    Call get_rest(R1):
         return 0, requests_to_counts[R1]
    Call get_rest(R2):
         return 3, requests_to_counts[R2]

    Note it is possible that in the first round acquiring, although
    R1 requested 2 duts, it may only get 1 or None. However get_rest
    doesn't need to care whether the first round succeeded or not, as
    in the case when the first round failed, regardless how many duts
    get_rest requests, it will not be fullfilled anyway.
    """

    _gauge = autotest_stats.Gauge(RDB_STATS_KEY)

    def __init__(self, host_requests):
        """Initialize.

        @param host_requests: A list of request to acquire hosts.
        """
        self.requests_to_counts = {}
        # The order matters, it determines which request got fullfilled first.
        self.requests = []
        for request, count in self._batch_requests(host_requests):
            self.requests.append(request)
            self.requests_to_counts[request] = count
        self.min_duts_map = dict((r.parent_job_id, r.suite_min_duts)
                                 for r in self.requests_to_counts.iterkeys()
                                 if r.parent_job_id)

    @classmethod
    def _batch_requests(cls, requests):
        """ Group similar requests, sort by priority and parent_job_id.

        @param requests: A list or unsorted, unordered requests.

        @return: A list of tuples of the form (request, number of occurances)
            formed by counting the number of requests with the same acls/deps/
            priority in the input list of requests, and sorting by priority.
            The order of this list ensures against priority inversion.
        """
        sort_function = lambda request: (request[0].priority, -request[0].
                                         parent_job_id)
        return sorted(collections.Counter(requests).items(),
                      key=sort_function,
                      reverse=True)

    def get_min_duts(self, host_request):
        """Given a distinct host request figure out min duts to request for.

        @param host_request: A request.
        @returns: The minimum duts that should be requested.
        """
        parent_id = host_request.parent_job_id
        count = self.requests_to_counts[host_request]
        if parent_id:
            min_duts = self.min_duts_map.get(parent_id, 0)
            to_acquire = min(count, min_duts)
            self.min_duts_map[parent_id] = max(0, min_duts - to_acquire)
        else:
            to_acquire = 0
        self.requests_to_counts[host_request] -= to_acquire
        return to_acquire

    def get_duts(self, host_request):
        """Return the number of duts host_request still need.

        @param host_request: A request.
        @returns: The number of duts need to be requested.
        """
        return self.requests_to_counts[host_request]

    def record_acquire_min_duts(cls, host_request, hosts_required,
                                acquired_host_count):
        """Send stats to graphite about host acquisition.

        @param host_request: A request.
        @param hosts_required: Number of hosts required to satisfy request.
        @param acquired_host_count: Number of host acquired.
        """
        try:
            priority = priorities.Priority.get_string(host_request.priority)
        except ValueError:
            return
        key = ('min_duts_acquisition_rate.%s') % priority
        cls._gauge.send(key, acquired_host_count / float(hosts_required))
Ejemplo n.º 16
0
    def offload_dir(dir_entry, dest_path):
        """Offload the specified directory entry to Google storage.

        @param dir_entry: Directory entry to offload.
        @param dest_path: Location in google storage where we will
                          offload the directory.

        """
        try:
            counter = autotest_stats.Counter(STATS_KEY)
            counter.increment('jobs_offload_started')

            sanitize_dir(dir_entry)

            if LIMIT_FILE_COUNT:
                limit_file_count(dir_entry)

            error = False
            stdout_file = tempfile.TemporaryFile('w+')
            stderr_file = tempfile.TemporaryFile('w+')
            process = None
            signal.alarm(OFFLOAD_TIMEOUT_SECS)
            gs_path = '%s%s' % (gs_uri, dest_path)
            process = subprocess.Popen(get_cmd_list(multiprocessing, dir_entry,
                                                    gs_path),
                                       stdout=stdout_file,
                                       stderr=stderr_file)
            process.wait()
            signal.alarm(0)

            if process.returncode == 0:
                dir_size = get_directory_size_kibibytes(dir_entry)

                counter.increment('kibibytes_transferred_total', dir_size)
                metadata = {
                    '_type': METADATA_TYPE,
                    'size_KB': dir_size,
                    'result_dir': dir_entry,
                    'drone': socket.gethostname().replace('.', '_')
                }
                autotest_stats.Gauge(STATS_KEY, metadata=metadata).send(
                    'kibibytes_transferred', dir_size)
                counter.increment('jobs_offloaded')
                shutil.rmtree(dir_entry)
            else:
                error = True
        except TimeoutException:
            # If we finished the call to Popen(), we may need to
            # terminate the child process.  We don't bother calling
            # process.poll(); that inherently races because the child
            # can die any time it wants.
            if process:
                try:
                    process.terminate()
                except OSError:
                    # We don't expect any error other than "No such
                    # process".
                    pass
            logging.error(
                'Offloading %s timed out after waiting %d '
                'seconds.', dir_entry, OFFLOAD_TIMEOUT_SECS)
            error = True
        except OSError as e:
            # The wrong file permission can lead call
            # `shutil.rmtree(dir_entry)` to raise OSError with message
            # 'Permission denied'. Details can be found in
            # crbug.com/536151
            if e.errno == errno.EACCES:
                logging.warn('Try to correct file permission of %s.',
                             dir_entry)
                correct_results_folder_permission(dir_entry)
        finally:
            signal.alarm(0)
            if error:
                # Rewind the log files for stdout and stderr and log
                # their contents.
                stdout_file.seek(0)
                stderr_file.seek(0)
                stderr_content = stderr_file.read()
                logging.error('Error occurred when offloading %s:', dir_entry)
                logging.error('Stdout:\n%s \nStderr:\n%s', stdout_file.read(),
                              stderr_content)
                # Some result files may have wrong file permission. Try
                # to correct such error so later try can success.
                # TODO(dshi): The code is added to correct result files
                # with wrong file permission caused by bug 511778. After
                # this code is pushed to lab and run for a while to
                # clean up these files, following code and function
                # correct_results_folder_permission can be deleted.
                if 'CommandException: Error opening file' in stderr_content:
                    logging.warn('Try to correct file permission of %s.',
                                 dir_entry)
                    correct_results_folder_permission(dir_entry)
            stdout_file.close()
            stderr_file.close()
Ejemplo n.º 17
0
class SiteDispatcher(object):
    """
    SiteDispatcher subclasses BaseDispatcher in monitor_db.
    """
    DEFAULT_REQUESTED_BY_USER_ID = 1

    _timer = autotest_stats.Timer('scheduler')
    _gauge = autotest_stats.Gauge('scheduler_rel')
    _tick_start = None

    @_timer.decorate
    def tick(self):
        self._tick_start = time.time()
        super(SiteDispatcher, self).tick()
        self._gauge.send('tick', time.time() - self._tick_start)

    @_timer.decorate
    def _garbage_collection(self):
        super(SiteDispatcher, self)._garbage_collection()
        if self._tick_start:
            self._gauge.send('_garbage_collection',
                             time.time() - self._tick_start)

    @_timer.decorate
    def _run_cleanup(self):
        super(SiteDispatcher, self)._run_cleanup()
        if self._tick_start:
            self._gauge.send('_run_cleanup', time.time() - self._tick_start)

    @_timer.decorate
    def _find_aborting(self):
        super(SiteDispatcher, self)._find_aborting()
        if self._tick_start:
            self._gauge.send('_find_aborting', time.time() - self._tick_start)

    @_timer.decorate
    def _process_recurring_runs(self):
        super(SiteDispatcher, self)._process_recurring_runs()
        if self._tick_start:
            self._gauge.send('_process_recurring_runs',
                             time.time() - self._tick_start)

    @_timer.decorate
    def _schedule_delay_tasks(self):
        super(SiteDispatcher, self)._schedule_delay_tasks()
        if self._tick_start:
            self._gauge.send('_schedule_delay_tasks',
                             time.time() - self._tick_start)

    @_timer.decorate
    def _schedule_running_host_queue_entries(self):
        super(SiteDispatcher, self)._schedule_running_host_queue_entries()
        if self._tick_start:
            self._gauge.send('_schedule_running_host_queue_entries',
                             time.time() - self._tick_start)

    @_timer.decorate
    def _schedule_special_tasks(self):
        super(SiteDispatcher, self)._schedule_special_tasks()
        if self._tick_start:
            self._gauge.send('_schedule_special_tasks',
                             time.time() - self._tick_start)

    @_timer.decorate
    def _schedule_new_jobs(self):
        super(SiteDispatcher, self)._schedule_new_jobs()
        if self._tick_start:
            self._gauge.send('_schedule_new_jobs',
                             time.time() - self._tick_start)

    @_timer.decorate
    def _handle_agents(self):
        super(SiteDispatcher, self)._handle_agents()
        if self._tick_start:
            self._gauge.send('_handle_agents', time.time() - self._tick_start)

    def _reverify_hosts_where(self,
                              where,
                              print_message='Reverifying host %s'):
        """
        This is an altered version of _reverify_hosts_where the class to
        models.SpecialTask.objects.create passes in an argument for
        requested_by, in order to allow the Reset task to be created
        properly.
        """
        full_where = 'locked = 0 AND invalid = 0 AND ' + where
        for host in scheduler_models.Host.fetch(where=full_where):
            if self.host_has_agent(host):
                # host has already been recovered in some way
                continue
            if self._host_has_scheduled_special_task(host):
                # host will have a special task scheduled on the next cycle
                continue
            if print_message:
                logging.error(print_message, host.hostname)
            try:
                user = models.User.objects.get(login='******')
            except models.User.DoesNotExist:
                user = models.User.objects.get(
                    id=SiteDispatcher.DEFAULT_REQUESTED_BY_USER_ID)
            models.SpecialTask.objects.create(
                task=models.SpecialTask.Task.RESET,
                host=models.Host.objects.get(id=host.id),
                requested_by=user)

    def _check_for_unrecovered_verifying_entries(self):
        # Verify is replaced by Reset.
        queue_entries = scheduler_models.HostQueueEntry.fetch(
            where='status = "%s"' % models.HostQueueEntry.Status.RESETTING)
        for queue_entry in queue_entries:
            special_tasks = models.SpecialTask.objects.filter(
                task__in=(models.SpecialTask.Task.CLEANUP,
                          models.SpecialTask.Task.VERIFY,
                          models.SpecialTask.Task.RESET),
                queue_entry__id=queue_entry.id,
                is_complete=False)
            if special_tasks.count() == 0:
                logging.error(
                    'Unrecovered Resetting host queue entry: %s. '
                    'Setting status to Queued.', str(queue_entry))
                # Essentially this host queue entry was set to be Verifying
                # however no special task exists for entry. This occurs if the
                # scheduler dies between changing the status and creating the
                # special task. By setting it to queued, the job can restart
                # from the beginning and proceed correctly. This is much more
                # preferable than having monitor_db not launching.
                queue_entry.set_status('Queued')
def report_stats(board, pool, start_time, end_time, span):
    """Report machine stats for given board, pool and time period.

    @param board: Name of board.
    @param pool: Name of pool.
    @param start_time: start time to collect stats.
    @param end_time: end time to collect stats.
    @param span: Number of hours that the stats should be collected for.
    @return: Error message collected when calculating the stats.
    """
    print '================ %-12s %-12s ================' % (board, pool)
    try:
        history = host_history.get_history_details(start_time=start_time,
                                                   end_time=end_time,
                                                   board=board,
                                                   pool=pool)
    except host_history_utils.NoHostFoundException as e:
        print 'No history found. Error:\n%s' % e
        history = None
        mur = -1
        mar = -1
        mir = -1

    if history:
        status_intervals = host_history_utils.get_status_intervals(history)
        stats_all, num_hosts = host_history_utils.aggregate_hosts(
            status_intervals)
        total = 0
        total_time = span * 3600 * num_hosts
        for status, interval in stats_all.iteritems():
            total += interval
        if abs(total - total_time) > 10:
            error = ('Status intervals do not add up. No stats will be '
                     'collected for board: %s, pool: %s, diff: %s' %
                     (board, pool, total - total_time))
            hosts = []
            for history_for_host in status_intervals:
                total = 0
                for interval in history_for_host.keys():
                    total += interval[1] - interval[0]
                if total > span * 3600:
                    hosts.append(
                        history_for_host.values()[0]['metadata']['hostname'])
            error += ' hosts: %s' % ','.join(hosts)
            print error
            return error

        mur = host_history_utils.get_machine_utilization_rate(stats_all)
        mar = host_history_utils.get_machine_availability_rate(stats_all)
        mir = mar - mur

        for status, interval in stats_all.iteritems():
            print '%-18s %-16s %-10.2f%%' % (status, interval,
                                             100 * interval / total_time)
        print 'Machine utilization rate  = %-4.2f%%' % (100 * mur)
        print 'Machine availability rate = %-4.2f%%' % (100 * mar)

    autotest_stats.Gauge('machine_utilization_rate').send(
        '%s_hours.%s.%s' % (span, board, pool), mur)
    autotest_stats.Gauge('machine_availability_rate').send(
        '%s_hours.%s.%s' % (span, board, pool), mar)
    autotest_stats.Gauge('machine_idle_rate').send(
        '%s_hours.%s.%s' % (span, board, pool), mir)
 def _decrement_running_processes(cls):
     cls._num_running_processes -= 1
     autotest_stats.Gauge('scheduler').send(
             '%s.num_running_processes' % cls.__name__,
             cls._num_running_processes)