def test_phases(self):
        monitor = SystemResourceMonitor(poll_interval=0.25)

        monitor.start()
        time.sleep(1)

        with monitor.phase('phase1'):
            time.sleep(1)

            with monitor.phase('phase2'):
                time.sleep(1)

        monitor.stop()

        self.assertEqual(len(monitor.phases), 2)
        self.assertEqual(['phase2', 'phase1'], monitor.phases.keys())

        all = list(monitor.range_usage())
        data1 = list(monitor.phase_usage('phase1'))
        data2 = list(monitor.phase_usage('phase2'))

        self.assertGreater(len(all), len(data1))
        self.assertGreater(len(data1), len(data2))

        # This could fail if time.time() takes more than 0.1s. It really
        # shouldn't.
        self.assertAlmostEqual(data1[-1].end, data2[-1].end, delta=0.25)
    def test_aggregate_cpu(self):
        monitor = SystemResourceMonitor(poll_interval=0.25)

        monitor.start()
        time.sleep(1)
        monitor.stop()

        values = monitor.aggregate_cpu_percent()
        self.assertIsInstance(values, list)
        self.assertEqual(len(values), multiprocessing.cpu_count())
        for v in values:
            self.assertIsInstance(v, float)

        value = monitor.aggregate_cpu_percent(per_cpu=False)
        self.assertIsInstance(value, float)

        values = monitor.aggregate_cpu_times()
        self.assertIsInstance(values, list)
        self.assertGreater(len(values), 0)
        self.assertTrue(hasattr(values[0], 'user'))

        t = type(values[0])

        value = monitor.aggregate_cpu_times(per_cpu=False)
        self.assertIsInstance(value, t)
    def test_empty(self):
        monitor = SystemResourceMonitor(poll_interval=2.0)
        monitor.start()
        monitor.stop()

        data = list(monitor.range_usage())
        self.assertEqual(len(data), 0)
Example #4
0
    def _start_resource_monitoring(self, action, success=None):
        self.activate_virtualenv()

        # Resource Monitor requires Python 2.7, however it's currently optional.
        # Remove when all machines have had their Python version updated (bug 711299).
        if sys.version_info[:2] < (2, 7):
            self.warning('Resource monitoring will not be enabled! Python 2.7+ required.')
            return

        try:
            from mozsystemmonitor.resourcemonitor import SystemResourceMonitor

            self.info("Starting resource monitoring.")
            self._resource_monitor = SystemResourceMonitor(poll_interval=1.0)
            self._resource_monitor.start()
        except Exception:
            self.warning("Unable to start resource monitor: %s" %
                         traceback.format_exc())
    def test_memory(self):
        monitor = SystemResourceMonitor(poll_interval=0.25)

        monitor.start()
        time.sleep(1.0)
        monitor.stop()

        v = monitor.min_memory_available()
        self.assertIsInstance(v, long)

        v = monitor.max_memory_percent()
        self.assertIsInstance(v, float)
Example #6
0
    def init(self, warnings_path):
        """Create a new monitor.

        warnings_path is a path of a warnings database to use.
        """
        self._warnings_path = warnings_path
        self.resources = SystemResourceMonitor(poll_interval=1.0)
        self._resources_started = False

        self.tiers = TierStatus(self.resources)

        self.warnings_database = WarningsDatabase()
        if os.path.exists(warnings_path):
            try:
                self.warnings_database.load_from_file(warnings_path)
            except ValueError:
                os.remove(warnings_path)

        self._warnings_collector = WarningsCollector(
            database=self.warnings_database, objdir=self.topobjdir)
Example #7
0
    def _start_resource_monitoring(self, action, success=None):
        self.activate_virtualenv()

        try:
            from mozsystemmonitor.resourcemonitor import SystemResourceMonitor

            self.info("Starting resource monitoring.")
            self._resource_monitor = SystemResourceMonitor(poll_interval=1.0)
            self._resource_monitor.start()
        except Exception:
            self.warning("Unable to start resource monitor: %s" %
                         traceback.format_exc())
    def test_aggregate_io(self):
        monitor = SystemResourceMonitor(poll_interval=0.25)

        # There's really no easy way to ensure I/O occurs. For all we know
        # reads and writes will all be serviced by the page cache.
        monitor.start()
        time.sleep(1.0)
        monitor.stop()

        values = monitor.aggregate_io()
        self.assertTrue(hasattr(values, 'read_count'))
    def test_basic(self):
        monitor = SystemResourceMonitor(poll_interval=0.5)

        monitor.start()
        time.sleep(3)

        monitor.stop()

        data = list(monitor.range_usage())
        self.assertGreater(len(data), 3)

        self.assertIsInstance(data[0], SystemResourceUsage)
Example #10
0
    def init(self, warnings_path):
        """Create a new monitor.

        warnings_path is a path of a warnings database to use.
        """
        self._warnings_path = warnings_path
        self.resources = SystemResourceMonitor(poll_interval=1.0)
        self._resources_started = False

        self.tiers = TierStatus(self.resources)

        self.warnings_database = WarningsDatabase()
        if os.path.exists(warnings_path):
            try:
                self.warnings_database.load_from_file(warnings_path)
            except ValueError:
                os.remove(warnings_path)

        # Contains warnings unique to this invocation. Not populated with old
        # warnings.
        self.instance_warnings = WarningsDatabase()

        def on_warning(warning):
            filename = warning['filename']

            if not os.path.exists(filename):
                raise Exception('Could not find file containing warning: %s' %
                                filename)

            self.warnings_database.insert(warning)
            # Make a copy so mutations don't impact other database.
            self.instance_warnings.insert(warning.copy())

        self._warnings_collector = WarningsCollector(on_warning,
                                                     objdir=self.topobjdir)

        self.build_objects = []
Example #11
0
    def _start_resource_monitoring(self, action, success=None):
        self.activate_virtualenv()

        # Resource Monitor requires Python 2.7, however it's currently optional.
        # Remove when all machines have had their Python version updated (bug 711299).
        if sys.version_info[:2] < (2, 7):
            self.warning("Resource monitoring will not be enabled! Python 2.7+ required.")
            return

        try:
            from mozsystemmonitor.resourcemonitor import SystemResourceMonitor

            self.info("Starting resource monitoring.")
            self._resource_monitor = SystemResourceMonitor(poll_interval=1.0)
            self._resource_monitor.start()
        except Exception:
            self.warning("Unable to start resource monitor: %s" % traceback.format_exc())
Example #12
0
    def init(self, warnings_path):
        """Create a new monitor.

        warnings_path is a path of a warnings database to use.
        """
        self._warnings_path = warnings_path
        self.resources = SystemResourceMonitor(poll_interval=1.0)
        self._resources_started = False

        self.tiers = TierStatus(self.resources)

        self.warnings_database = WarningsDatabase()
        if os.path.exists(warnings_path):
            try:
                self.warnings_database.load_from_file(warnings_path)
            except ValueError:
                os.remove(warnings_path)

        self._warnings_collector = WarningsCollector(database=self.warnings_database, objdir=self.topobjdir)
Example #13
0
class ResourceMonitoringMixin(PerfherderResourceOptionsMixin):
    """Provides resource monitoring capabilities to scripts.

    When this class is in the inheritance chain, resource usage stats of the
    executing script will be recorded.

    This class requires the VirtualenvMixin in order to install a package used
    for recording resource usage.

    While we would like to record resource usage for the entirety of a script,
    since we require an external package, we can only record resource usage
    after that package is installed (as part of creating the virtualenv).
    That's just the way things have to be.
    """
    def __init__(self, *args, **kwargs):
        super(ResourceMonitoringMixin, self).__init__(*args, **kwargs)

        self.register_virtualenv_module('psutil>=3.1.1',
                                        method='pip',
                                        optional=True)
        self.register_virtualenv_module('mozsystemmonitor==0.3',
                                        method='pip',
                                        optional=True)
        self.register_virtualenv_module('jsonschema==2.5.1', method='pip')
        # explicitly install functools32, because some slaves aren't using
        # a version of pip recent enough to install it automatically with
        # jsonschema (which depends on it)
        # https://github.com/Julian/jsonschema/issues/233
        self.register_virtualenv_module('functools32==3.2.3-2', method='pip')
        self._resource_monitor = None

        # 2-tuple of (name, options) to assign Perfherder resource monitor
        # metrics to. This needs to be assigned by a script in order for
        # Perfherder metrics to be reported.
        self.resource_monitor_perfherder_id = None

    @PostScriptAction('create-virtualenv')
    def _start_resource_monitoring(self, action, success=None):
        self.activate_virtualenv()

        # Resource Monitor requires Python 2.7, however it's currently optional.
        # Remove when all machines have had their Python version updated (bug 711299).
        if sys.version_info[:2] < (2, 7):
            self.warning(
                'Resource monitoring will not be enabled! Python 2.7+ required.'
            )
            return

        try:
            from mozsystemmonitor.resourcemonitor import SystemResourceMonitor

            self.info("Starting resource monitoring.")
            self._resource_monitor = SystemResourceMonitor(poll_interval=1.0)
            self._resource_monitor.start()
        except Exception:
            self.warning("Unable to start resource monitor: %s" %
                         traceback.format_exc())

    @PreScriptAction
    def _resource_record_pre_action(self, action):
        # Resource monitor isn't available until after create-virtualenv.
        if not self._resource_monitor:
            return

        self._resource_monitor.begin_phase(action)

    @PostScriptAction
    def _resource_record_post_action(self, action, success=None):
        # Resource monitor isn't available until after create-virtualenv.
        if not self._resource_monitor:
            return

        self._resource_monitor.finish_phase(action)

    @PostScriptRun
    def _resource_record_post_run(self):
        if not self._resource_monitor:
            return

        # This should never raise an exception. This is a workaround until
        # mozsystemmonitor is fixed. See bug 895388.
        try:
            self._resource_monitor.stop()
            self._log_resource_usage()

            # Upload a JSON file containing the raw resource data.
            try:
                upload_dir = self.query_abs_dirs()['abs_blob_upload_dir']
                if not os.path.exists(upload_dir):
                    os.makedirs(upload_dir)
                with open(os.path.join(upload_dir, 'resource-usage.json'),
                          'wb') as fh:
                    json.dump(self._resource_monitor.as_dict(),
                              fh,
                              sort_keys=True,
                              indent=4)
            except (AttributeError, KeyError):
                self.exception('could not upload resource usage JSON',
                               level=WARNING)

        except Exception:
            self.warning("Exception when reporting resource usage: %s" %
                         traceback.format_exc())

    def _log_resource_usage(self):
        # Delay import because not available until virtualenv is populated.
        import jsonschema

        rm = self._resource_monitor

        if rm.start_time is None:
            return

        def resources(phase):
            cpu_percent = rm.aggregate_cpu_percent(phase=phase, per_cpu=False)
            cpu_times = rm.aggregate_cpu_times(phase=phase, per_cpu=False)
            io = rm.aggregate_io(phase=phase)

            swap_in = sum(m.swap.sin for m in rm.measurements)
            swap_out = sum(m.swap.sout for m in rm.measurements)

            return cpu_percent, cpu_times, io, (swap_in, swap_out)

        def log_usage(prefix, duration, cpu_percent, cpu_times, io):
            message = '{prefix} - Wall time: {duration:.0f}s; ' \
                'CPU: {cpu_percent}; ' \
                'Read bytes: {io_read_bytes}; Write bytes: {io_write_bytes}; ' \
                'Read time: {io_read_time}; Write time: {io_write_time}'

            # XXX Some test harnesses are complaining about a string being
            # being fed into a 'f' formatter. This will help diagnose the
            # issue.
            cpu_percent_str = str(round(
                cpu_percent)) + '%' if cpu_percent else "Can't collect data"

            try:
                self.info(
                    message.format(prefix=prefix,
                                   duration=duration,
                                   cpu_percent=cpu_percent_str,
                                   io_read_bytes=io.read_bytes,
                                   io_write_bytes=io.write_bytes,
                                   io_read_time=io.read_time,
                                   io_write_time=io.write_time))

            except ValueError:
                self.warning("Exception when formatting: %s" %
                             traceback.format_exc())

        cpu_percent, cpu_times, io, (swap_in, swap_out) = resources(None)
        duration = rm.end_time - rm.start_time

        # Write out Perfherder data if configured.
        if self.resource_monitor_perfherder_id:
            perfherder_name, perfherder_options = self.resource_monitor_perfherder_id

            suites = []
            overall = []

            if cpu_percent:
                overall.append({
                    'name': 'cpu_percent',
                    'value': cpu_percent,
                })

            overall.extend([
                {
                    'name': 'io_write_bytes',
                    'value': io.write_bytes
                },
                {
                    'name': 'io.read_bytes',
                    'value': io.read_bytes
                },
                {
                    'name': 'io_write_time',
                    'value': io.write_time
                },
                {
                    'name': 'io_read_time',
                    'value': io.read_time
                },
            ])

            suites.append({
                'name':
                '%s.overall' % perfherder_name,
                'extraOptions':
                perfherder_options + self.perfherder_resource_options(),
                'subtests':
                overall,
            })

            for phase in rm.phases.keys():
                phase_duration = rm.phases[phase][1] - rm.phases[phase][0]
                subtests = [{
                    'name': 'time',
                    'value': phase_duration,
                }]
                cpu_percent = rm.aggregate_cpu_percent(phase=phase,
                                                       per_cpu=False)
                if cpu_percent is not None:
                    subtests.append({
                        'name':
                        'cpu_percent',
                        'value':
                        rm.aggregate_cpu_percent(phase=phase, per_cpu=False),
                    })

                # We don't report I/O during each step because measured I/O
                # is system I/O and that I/O can be delayed (e.g. writes will
                # buffer before being flushed and recorded in our metrics).
                suites.append({
                    'name': '%s.%s' % (perfherder_name, phase),
                    'subtests': subtests,
                })

            data = {
                'framework': {
                    'name': 'job_resource_usage'
                },
                'suites': suites,
            }

            schema_path = os.path.join(external_tools_path,
                                       'performance-artifact-schema.json')
            with open(schema_path, 'rb') as fh:
                schema = json.load(fh)

            # this will throw an exception that causes the job to fail if the
            # perfherder data is not valid -- please don't change this
            # behaviour, otherwise people will inadvertently break this
            # functionality
            self.info('Validating Perfherder data against %s' % schema_path)
            jsonschema.validate(data, schema)
            self.info('PERFHERDER_DATA: %s' % json.dumps(data))

        log_usage('Total resource usage', duration, cpu_percent, cpu_times, io)

        # Print special messages so usage shows up in Treeherder.
        if cpu_percent:
            self._tinderbox_print('CPU usage<br/>{:,.1f}%'.format(cpu_percent))

        self._tinderbox_print('I/O read bytes / time<br/>{:,} / {:,}'.format(
            io.read_bytes, io.read_time))
        self._tinderbox_print('I/O write bytes / time<br/>{:,} / {:,}'.format(
            io.write_bytes, io.write_time))

        # Print CPU components having >1%. "cpu_times" is a data structure
        # whose attributes are measurements. Ideally we'd have an API that
        # returned just the measurements as a dict or something.
        cpu_attrs = []
        for attr in sorted(dir(cpu_times)):
            if attr.startswith('_'):
                continue
            if attr in ('count', 'index'):
                continue
            cpu_attrs.append(attr)

        cpu_total = sum(getattr(cpu_times, attr) for attr in cpu_attrs)

        for attr in cpu_attrs:
            value = getattr(cpu_times, attr)
            # cpu_total can be 0.0. Guard against division by 0.
            percent = value / cpu_total * 100.0 if cpu_total else 0.0

            if percent > 1.00:
                self._tinderbox_print('CPU {}<br/>{:,.1f} ({:,.1f}%)'.format(
                    attr, value, percent))

        # Swap on Windows isn't reported by psutil.
        if not self._is_windows():
            self._tinderbox_print('Swap in / out<br/>{:,} / {:,}'.format(
                swap_in, swap_out))

        for phase in rm.phases.keys():
            start_time, end_time = rm.phases[phase]
            cpu_percent, cpu_times, io, swap = resources(phase)
            log_usage(phase, end_time - start_time, cpu_percent, cpu_times, io)

    def _tinderbox_print(self, message):
        self.info('TinderboxPrint: %s' % message)
Example #14
0
class ResourceMonitoringMixin(object):
    """Provides resource monitoring capabilities to scripts.

    When this class is in the inheritance chain, resource usage stats of the
    executing script will be recorded.

    This class requires the VirtualenvMixin in order to install a package used
    for recording resource usage.

    While we would like to record resource usage for the entirety of a script,
    since we require an external package, we can only record resource usage
    after that package is installed (as part of creating the virtualenv).
    That's just the way things have to be.
    """
    def __init__(self, *args, **kwargs):
        super(ResourceMonitoringMixin, self).__init__(*args, **kwargs)

        self.register_virtualenv_module('psutil>=0.7.1',
                                        method='pip',
                                        optional=True)
        self.register_virtualenv_module('mozsystemmonitor==0.0.0',
                                        method='pip',
                                        optional=True)
        self._resource_monitor = None

    @PostScriptAction('create-virtualenv')
    def _start_resource_monitoring(self, action, success=None):
        self.activate_virtualenv()

        # Resource Monitor requires Python 2.7, however it's currently optional.
        # Remove when all machines have had their Python version updated (bug 711299).
        if sys.version_info[:2] < (2, 7):
            self.warning(
                'Resource monitoring will not be enabled! Python 2.7+ required.'
            )
            return

        try:
            from mozsystemmonitor.resourcemonitor import SystemResourceMonitor

            self.info("Starting resource monitoring.")
            self._resource_monitor = SystemResourceMonitor(poll_interval=1.0)
            self._resource_monitor.start()
        except Exception:
            self.warning("Unable to start resource monitor: %s" %
                         traceback.format_exc())

    @PreScriptAction
    def _resource_record_pre_action(self, action):
        # Resource monitor isn't available until after create-virtualenv.
        if not self._resource_monitor:
            return

        self._resource_monitor.begin_phase(action)

    @PostScriptAction
    def _resource_record_post_action(self, action, success=None):
        # Resource monitor isn't available until after create-virtualenv.
        if not self._resource_monitor:
            return

        self._resource_monitor.finish_phase(action)

    @PostScriptRun
    def _resource_record_post_run(self):
        if not self._resource_monitor:
            return

        # This should never raise an exception. This is a workaround until
        # mozsystemmonitor is fixed. See bug 895388.
        try:
            self._resource_monitor.stop()
            self._log_resource_usage()
        except Exception:
            self.warning("Exception when reporting resource usage: %s" %
                         traceback.format_exc())

    def _log_resource_usage(self):
        rm = self._resource_monitor

        if rm.start_time is None:
            return

        def resources(phase):
            cpu_percent = rm.aggregate_cpu_percent(phase=phase, per_cpu=False)
            cpu_times = rm.aggregate_cpu_times(phase=phase, per_cpu=False)
            io = rm.aggregate_io(phase=phase)

            return cpu_percent, cpu_times, io

        def log_usage(prefix, duration, cpu_percent, cpu_times, io):
            message = '{prefix} - Wall time: {duration:.0f}s; ' \
                'CPU: {cpu_percent}; ' \
                'Read bytes: {io_read_bytes}; Write bytes: {io_write_bytes}; ' \
                'Read time: {io_read_time}; Write time: {io_write_time}'

            # XXX Some test harnesses are complaining about a string being
            # being fed into a 'f' formatter. This will help diagnose the
            # issue.
            cpu_percent_str = str(round(
                cpu_percent)) + '%' if cpu_percent else "Can't collect data"

            try:
                self.info(
                    message.format(prefix=prefix,
                                   duration=duration,
                                   cpu_percent=cpu_percent_str,
                                   io_read_bytes=io.read_bytes,
                                   io_write_bytes=io.write_bytes,
                                   io_read_time=io.read_time,
                                   io_write_time=io.write_time))
            except ValueError:
                self.warning("Exception when formatting: %s" %
                             traceback.format_exc())

        cpu_percent, cpu_times, io = resources(None)
        duration = rm.end_time - rm.start_time

        log_usage('Total resource usage', duration, cpu_percent, cpu_times, io)

        for phase in rm.phases.keys():
            start_time, end_time = rm.phases[phase]
            cpu_percent, cpu_times, io = resources(phase)
            log_usage(phase, end_time - start_time, cpu_percent, cpu_times, io)
Example #15
0
class ResourceMonitoringMixin(object):
    """Provides resource monitoring capabilities to scripts.

    When this class is in the inheritance chain, resource usage stats of the
    executing script will be recorded.

    This class requires the VirtualenvMixin in order to install a package used
    for recording resource usage.

    While we would like to record resource usage for the entirety of a script,
    since we require an external package, we can only record resource usage
    after that package is installed (as part of creating the virtualenv).
    That's just the way things have to be.
    """
    def __init__(self, *args, **kwargs):
        super(ResourceMonitoringMixin, self).__init__(*args, **kwargs)

        self.register_virtualenv_module('psutil>=0.7.1', method='pip',
                                        optional=True)
        self.register_virtualenv_module('mozsystemmonitor==0.0.0',
                                        method='pip', optional=True)
        self._resource_monitor = None

    @PostScriptAction('create-virtualenv')
    def _start_resource_monitoring(self, action, success=None):
        self.activate_virtualenv()

        # Resource Monitor requires Python 2.7, however it's currently optional.
        # Remove when all machines have had their Python version updated (bug 711299).
        if sys.version_info[:2] < (2, 7):
            self.warning('Resource monitoring will not be enabled! Python 2.7+ required.')
            return

        try:
            from mozsystemmonitor.resourcemonitor import SystemResourceMonitor

            self.info("Starting resource monitoring.")
            self._resource_monitor = SystemResourceMonitor(poll_interval=1.0)
            self._resource_monitor.start()
        except Exception:
            self.warning("Unable to start resource monitor: %s" %
                         traceback.format_exc())

    @PreScriptAction
    def _resource_record_pre_action(self, action):
        # Resource monitor isn't available until after create-virtualenv.
        if not self._resource_monitor:
            return

        self._resource_monitor.begin_phase(action)

    @PostScriptAction
    def _resource_record_post_action(self, action, success=None):
        # Resource monitor isn't available until after create-virtualenv.
        if not self._resource_monitor:
            return

        self._resource_monitor.finish_phase(action)

    @PostScriptRun
    def _resource_record_post_run(self):
        if not self._resource_monitor:
            return

        # This should never raise an exception. This is a workaround until
        # mozsystemmonitor is fixed. See bug 895388.
        try:
            self._resource_monitor.stop()
            self._log_resource_usage()
        except Exception:
            self.warning("Exception when reporting resource usage: %s" %
                         traceback.format_exc())

    def _log_resource_usage(self):
        rm = self._resource_monitor

        if rm.start_time is None:
            return

        def resources(phase):
            cpu_percent = rm.aggregate_cpu_percent(phase=phase, per_cpu=False)
            cpu_times = rm.aggregate_cpu_times(phase=phase, per_cpu=False)
            io = rm.aggregate_io(phase=phase)

            return cpu_percent, cpu_times, io

        def log_usage(prefix, duration, cpu_percent, cpu_times, io):
            message = '{prefix} - Wall time: {duration:.0f}s; ' \
                'CPU: {cpu_percent}; ' \
                'Read bytes: {io_read_bytes}; Write bytes: {io_write_bytes}; ' \
                'Read time: {io_read_time}; Write time: {io_write_time}'

            # XXX Some test harnesses are complaining about a string being
            # being fed into a 'f' formatter. This will help diagnose the
            # issue.
            cpu_percent_str = str(round(cpu_percent)) + '%' if cpu_percent else "Can't collect data"

            try:
                self.info(
                    message.format(
                        prefix=prefix, duration=duration,
                        cpu_percent=cpu_percent_str, io_read_bytes=io.read_bytes,
                        io_write_bytes=io.write_bytes, io_read_time=io.read_time,
                        io_write_time=io.write_time
                    )
                )
            except ValueError:
                self.warning("Exception when formatting: %s" %
                             traceback.format_exc())

        cpu_percent, cpu_times, io = resources(None)
        duration = rm.end_time - rm.start_time

        log_usage('Total resource usage', duration, cpu_percent, cpu_times, io)

        for phase in rm.phases.keys():
            start_time, end_time = rm.phases[phase]
            cpu_percent, cpu_times, io = resources(phase)
            log_usage(phase, end_time - start_time, cpu_percent, cpu_times, io)
Example #16
0
class ResourceMonitoringMixin(object):
    """Provides resource monitoring capabilities to scripts.

    When this class is in the inheritance chain, resource usage stats of the
    executing script will be recorded.

    This class requires the VirtualenvMixin in order to install a package used
    for recording resource usage.

    While we would like to record resource usage for the entirety of a script,
    since we require an external package, we can only record resource usage
    after that package is installed (as part of creating the virtualenv).
    That's just the way things have to be.
    """
    def __init__(self, *args, **kwargs):
        super(ResourceMonitoringMixin, self).__init__(*args, **kwargs)

        self.register_virtualenv_module('psutil>=3.1.1',
                                        method='pip',
                                        optional=True)
        self.register_virtualenv_module('mozsystemmonitor==0.3',
                                        method='pip',
                                        optional=True)
        self._resource_monitor = None

    @PostScriptAction('create-virtualenv')
    def _start_resource_monitoring(self, action, success=None):
        self.activate_virtualenv()

        # Resource Monitor requires Python 2.7, however it's currently optional.
        # Remove when all machines have had their Python version updated (bug 711299).
        if sys.version_info[:2] < (2, 7):
            self.warning(
                'Resource monitoring will not be enabled! Python 2.7+ required.'
            )
            return

        try:
            from mozsystemmonitor.resourcemonitor import SystemResourceMonitor

            self.info("Starting resource monitoring.")
            self._resource_monitor = SystemResourceMonitor(poll_interval=1.0)
            self._resource_monitor.start()
        except Exception:
            self.warning("Unable to start resource monitor: %s" %
                         traceback.format_exc())

    @PreScriptAction
    def _resource_record_pre_action(self, action):
        # Resource monitor isn't available until after create-virtualenv.
        if not self._resource_monitor:
            return

        self._resource_monitor.begin_phase(action)

    @PostScriptAction
    def _resource_record_post_action(self, action, success=None):
        # Resource monitor isn't available until after create-virtualenv.
        if not self._resource_monitor:
            return

        self._resource_monitor.finish_phase(action)

    @PostScriptRun
    def _resource_record_post_run(self):
        if not self._resource_monitor:
            return

        # This should never raise an exception. This is a workaround until
        # mozsystemmonitor is fixed. See bug 895388.
        try:
            self._resource_monitor.stop()
            self._log_resource_usage()

            # Upload a JSON file containing the raw resource data.
            try:
                upload_dir = self.query_abs_dirs()['abs_blob_upload_dir']
                with open(os.path.join(upload_dir, 'resource-usage.json'),
                          'wb') as fh:
                    json.dump(self._resource_monitor.as_dict(),
                              fh,
                              sort_keys=True,
                              indent=4)
            except (AttributeError, KeyError):
                self.exception('could not upload resource usage JSON',
                               level=WARNING)

        except Exception:
            self.warning("Exception when reporting resource usage: %s" %
                         traceback.format_exc())

    def _log_resource_usage(self):
        rm = self._resource_monitor

        if rm.start_time is None:
            return

        def resources(phase):
            cpu_percent = rm.aggregate_cpu_percent(phase=phase, per_cpu=False)
            cpu_times = rm.aggregate_cpu_times(phase=phase, per_cpu=False)
            io = rm.aggregate_io(phase=phase)

            swap_in = sum(m.swap.sin for m in rm.measurements)
            swap_out = sum(m.swap.sout for m in rm.measurements)

            return cpu_percent, cpu_times, io, (swap_in, swap_out)

        def log_usage(prefix, duration, cpu_percent, cpu_times, io):
            message = '{prefix} - Wall time: {duration:.0f}s; ' \
                'CPU: {cpu_percent}; ' \
                'Read bytes: {io_read_bytes}; Write bytes: {io_write_bytes}; ' \
                'Read time: {io_read_time}; Write time: {io_write_time}'

            # XXX Some test harnesses are complaining about a string being
            # being fed into a 'f' formatter. This will help diagnose the
            # issue.
            cpu_percent_str = str(round(
                cpu_percent)) + '%' if cpu_percent else "Can't collect data"

            try:
                self.info(
                    message.format(prefix=prefix,
                                   duration=duration,
                                   cpu_percent=cpu_percent_str,
                                   io_read_bytes=io.read_bytes,
                                   io_write_bytes=io.write_bytes,
                                   io_read_time=io.read_time,
                                   io_write_time=io.write_time))

            except ValueError:
                self.warning("Exception when formatting: %s" %
                             traceback.format_exc())

        cpu_percent, cpu_times, io, (swap_in, swap_out) = resources(None)
        duration = rm.end_time - rm.start_time

        log_usage('Total resource usage', duration, cpu_percent, cpu_times, io)

        # Print special messages so usage shows up in Treeherder.
        if cpu_percent:
            self._tinderbox_print('CPU usage<br/>{:,.1f}%'.format(cpu_percent))

        self._tinderbox_print('I/O read bytes / time<br/>{:,} / {:,}'.format(
            io.read_bytes, io.read_time))
        self._tinderbox_print('I/O write bytes / time<br/>{:,} / {:,}'.format(
            io.write_bytes, io.write_time))

        # Print CPU components having >1%. "cpu_times" is a data structure
        # whose attributes are measurements. Ideally we'd have an API that
        # returned just the measurements as a dict or something.
        cpu_attrs = []
        for attr in sorted(dir(cpu_times)):
            if attr.startswith('_'):
                continue
            if attr in ('count', 'index'):
                continue
            cpu_attrs.append(attr)

        cpu_total = sum(getattr(cpu_times, attr) for attr in cpu_attrs)

        for attr in cpu_attrs:
            value = getattr(cpu_times, attr)
            percent = value / cpu_total * 100.0
            if percent > 1.00:
                self._tinderbox_print('CPU {}<br/>{:,.1f} ({:,.1f}%)'.format(
                    attr, value, percent))

        # Swap on Windows isn't reported by psutil.
        if not self._is_windows():
            self._tinderbox_print('Swap in / out<br/>{:,} / {:,}'.format(
                swap_in, swap_out))

        for phase in rm.phases.keys():
            start_time, end_time = rm.phases[phase]
            cpu_percent, cpu_times, io, swap = resources(phase)
            log_usage(phase, end_time - start_time, cpu_percent, cpu_times, io)

    def _tinderbox_print(self, message):
        self.info('TinderboxPrint: %s' % message)
    def test_events(self):
        monitor = SystemResourceMonitor(poll_interval=0.25)

        monitor.start()
        time.sleep(0.5)

        t0 = time.time()
        monitor.record_event('t0')
        time.sleep(0.5)

        t1 = time.time()
        monitor.record_event('t1')
        time.sleep(0.5)
        monitor.stop()

        events = monitor.events
        self.assertEqual(len(events), 2)

        event = events[0]

        self.assertEqual(event[1], 't0')
        self.assertAlmostEqual(event[0], t0, delta=0.25)

        data = list(monitor.between_events_usage('t0', 't1'))
        self.assertGreater(len(data), 0)
    def test_no_data(self):
        monitor = SystemResourceMonitor()

        data = list(monitor.range_usage())
        self.assertEqual(len(data), 0)
Example #19
0
class BuildMonitor(MozbuildObject):
    """Monitors the output of the build."""

    def init(self, warnings_path):
        """Create a new monitor.

        warnings_path is a path of a warnings database to use.
        """
        self._warnings_path = warnings_path
        self.resources = SystemResourceMonitor(poll_interval=1.0)
        self._resources_started = False

        self.tiers = TierStatus(self.resources)

        self.warnings_database = WarningsDatabase()
        if os.path.exists(warnings_path):
            try:
                self.warnings_database.load_from_file(warnings_path)
            except ValueError:
                os.remove(warnings_path)

        self._warnings_collector = WarningsCollector(database=self.warnings_database, objdir=self.topobjdir)

        self.build_objects = []

    def start(self):
        """Record the start of the build."""
        self.start_time = time.time()
        self._finder_start_cpu = self._get_finder_cpu_usage()

    def start_resource_recording(self):
        # This should be merged into start() once bug 892342 lands.
        self.resources.start()
        self._resources_started = True

    def on_line(self, line):
        """Consume a line of output from the build system.

        This will parse the line for state and determine whether more action is
        needed.

        Returns a BuildOutputResult instance.

        In this named tuple, warning will be an object describing a new parsed
        warning. Otherwise it will be None.

        state_changed indicates whether the build system changed state with
        this line. If the build system changed state, the caller may want to
        query this instance for the current state in order to update UI, etc.

        for_display is a boolean indicating whether the line is relevant to the
        user. This is typically used to filter whether the line should be
        presented to the user.
        """
        if line.startswith("BUILDSTATUS"):
            args = line.split()[1:]

            action = args.pop(0)
            update_needed = True

            if action == "TIERS":
                self.tiers.set_tiers(args)
                update_needed = False
            elif action == "TIER_START":
                tier = args[0]
                self.tiers.begin_tier(tier)
            elif action == "TIER_FINISH":
                tier, = args
                self.tiers.finish_tier(tier)
            elif action == "OBJECT_FILE":
                self.build_objects.append(args[0])
                update_needed = False
            else:
                raise Exception("Unknown build status: %s" % action)

            return BuildOutputResult(None, update_needed, False)

        warning = None

        try:
            warning = self._warnings_collector.process_line(line)
        except:
            pass

        return BuildOutputResult(warning, False, True)

    def stop_resource_recording(self):
        if self._resources_started:
            self.resources.stop()

        self._resources_started = False

    def finish(self, record_usage=True):
        """Record the end of the build."""
        self.stop_resource_recording()
        self.end_time = time.time()
        self._finder_end_cpu = self._get_finder_cpu_usage()
        self.elapsed = self.end_time - self.start_time

        self.warnings_database.prune()
        self.warnings_database.save_to_file(self._warnings_path)

        if not record_usage:
            return

        try:
            usage = self.get_resource_usage()
            if not usage:
                return

            self.log_resource_usage(usage)
            with open(self._get_state_filename("build_resources.json"), "w") as fh:
                json.dump(self.resources.as_dict(), fh, indent=2)
        except Exception as e:
            self.log(
                logging.WARNING,
                "build_resources_error",
                {"msg": str(e)},
                "Exception when writing resource usage file: {msg}",
            )

    def _get_finder_cpu_usage(self):
        """Obtain the CPU usage of the Finder app on OS X.

        This is used to detect high CPU usage.
        """
        if not sys.platform.startswith("darwin"):
            return None

        if not psutil:
            return None

        for proc in psutil.process_iter():
            if proc.name != "Finder":
                continue

            if proc.username != getpass.getuser():
                continue

            # Try to isolate system finder as opposed to other "Finder"
            # processes.
            if not proc.exe.endswith("CoreServices/Finder.app/Contents/MacOS/Finder"):
                continue

            return proc.get_cpu_times()

        return None

    def have_high_finder_usage(self):
        """Determine whether there was high Finder CPU usage during the build.

        Returns True if there was high Finder CPU usage, False if there wasn't,
        or None if there is nothing to report.
        """
        if not self._finder_start_cpu:
            return None, None

        # We only measure if the measured range is sufficiently long.
        if self.elapsed < 15:
            return None, None

        if not self._finder_end_cpu:
            return None, None

        start = self._finder_start_cpu
        end = self._finder_end_cpu

        start_total = start.user + start.system
        end_total = end.user + end.system

        cpu_seconds = end_total - start_total

        # If Finder used more than 25% of 1 core during the build, report an
        # error.
        finder_percent = cpu_seconds / self.elapsed * 100

        return finder_percent > 25, finder_percent

    def have_excessive_swapping(self):
        """Determine whether there was excessive swapping during the build.

        Returns a tuple of (excessive, swap_in, swap_out). All values are None
        if no swap information is available.
        """
        if not self.have_resource_usage:
            return None, None, None

        swap_in = sum(m.swap.sin for m in self.resources.measurements)
        swap_out = sum(m.swap.sout for m in self.resources.measurements)

        # The threshold of 1024 MB has been arbitrarily chosen.
        #
        # Choosing a proper value that is ideal for everyone is hard. We will
        # likely iterate on the logic until people are generally satisfied.
        # If a value is too low, the eventual warning produced does not carry
        # much meaning. If the threshold is too high, people may not see the
        # warning and the warning will thus be ineffective.
        excessive = swap_in > 512 * 1048576 or swap_out > 512 * 1048576
        return excessive, swap_in, swap_out

    @property
    def have_resource_usage(self):
        """Whether resource usage is available."""
        return self.resources.start_time is not None

    def get_resource_usage(self):
        """ Produce a data structure containing the low-level resource usage information.

        This data structure can e.g. be serialized into JSON and saved for
        subsequent analysis.

        If no resource usage is available, None is returned.
        """
        if not self.have_resource_usage:
            return None

        cpu_percent = self.resources.aggregate_cpu_percent(phase=None, per_cpu=False)
        cpu_times = self.resources.aggregate_cpu_times(phase=None, per_cpu=False)
        io = self.resources.aggregate_io(phase=None)

        o = dict(
            version=3,
            argv=sys.argv,
            start=self.start_time,
            end=self.end_time,
            duration=self.end_time - self.start_time,
            resources=[],
            cpu_percent=cpu_percent,
            cpu_times=cpu_times,
            io=io,
            objects=self.build_objects,
        )

        o["tiers"] = self.tiers.tiered_resource_usage()

        self.tiers.add_resource_fields_to_dict(o)

        for usage in self.resources.range_usage():
            cpu_percent = self.resources.aggregate_cpu_percent(usage.start, usage.end, per_cpu=False)
            cpu_times = self.resources.aggregate_cpu_times(usage.start, usage.end, per_cpu=False)

            entry = dict(start=usage.start, end=usage.end, virt=list(usage.virt), swap=list(usage.swap))

            self.tiers.add_resources_to_dict(entry, start=usage.start, end=usage.end)

            o["resources"].append(entry)

        # If the imports for this file ran before the in-tree virtualenv
        # was bootstrapped (for instance, for a clobber build in automation),
        # psutil might not be available.
        #
        # Treat psutil as optional to avoid an outright failure to log resources
        # TODO: it would be nice to collect data on the storage device as well
        # in this case.
        o["system"] = {}
        if psutil:
            o["system"].update(
                dict(
                    logical_cpu_count=psutil.cpu_count(),
                    physical_cpu_count=psutil.cpu_count(logical=False),
                    swap_total=psutil.swap_memory()[0],
                    vmem_total=psutil.virtual_memory()[0],
                )
            )

        return o

    def log_resource_usage(self, usage):
        """Summarize the resource usage of this build in a log message."""

        if not usage:
            return

        params = dict(
            duration=self.end_time - self.start_time,
            cpu_percent=usage["cpu_percent"],
            io_read_bytes=usage["io"].read_bytes,
            io_write_bytes=usage["io"].write_bytes,
            io_read_time=usage["io"].read_time,
            io_write_time=usage["io"].write_time,
        )

        message = (
            "Overall system resources - Wall time: {duration:.0f}s; "
            "CPU: {cpu_percent:.0f}%; "
            "Read bytes: {io_read_bytes}; Write bytes: {io_write_bytes}; "
            "Read time: {io_read_time}; Write time: {io_write_time}"
        )

        self.log(logging.WARNING, "resource_usage", params, message)

        excessive, sin, sout = self.have_excessive_swapping()
        if excessive is not None and (sin or sout):
            sin /= 1048576
            sout /= 1048576
            self.log(logging.WARNING, "swap_activity", {"sin": sin, "sout": sout}, "Swap in/out (MB): {sin}/{sout}")

    def ccache_stats(self):
        ccache_stats = None

        try:
            ccache = which.which("ccache")
            output = subprocess.check_output([ccache, "-s"])
            ccache_stats = CCacheStats(output)
        except which.WhichError:
            pass
        except ValueError as e:
            self.log(logging.WARNING, "ccache", {"msg": str(e)}, "{msg}")

        return ccache_stats
Example #20
0
class BuildMonitor(MozbuildObject):
    """Monitors the output of the build."""
    def init(self, warnings_path):
        """Create a new monitor.

        warnings_path is a path of a warnings database to use.
        """
        self._warnings_path = warnings_path
        self.resources = SystemResourceMonitor(poll_interval=1.0)
        self._resources_started = False

        self.tiers = TierStatus(self.resources)

        self.warnings_database = WarningsDatabase()
        if os.path.exists(warnings_path):
            try:
                self.warnings_database.load_from_file(warnings_path)
            except ValueError:
                os.remove(warnings_path)

        self._warnings_collector = WarningsCollector(
            database=self.warnings_database, objdir=self.topobjdir)

    def start(self):
        """Record the start of the build."""
        self.start_time = time.time()
        self._finder_start_cpu = self._get_finder_cpu_usage()

    def start_resource_recording(self):
        # This should be merged into start() once bug 892342 lands.
        self.resources.start()
        self._resources_started = True

    def on_line(self, line):
        """Consume a line of output from the build system.

        This will parse the line for state and determine whether more action is
        needed.

        Returns a BuildOutputResult instance.

        In this named tuple, warning will be an object describing a new parsed
        warning. Otherwise it will be None.

        state_changed indicates whether the build system changed state with
        this line. If the build system changed state, the caller may want to
        query this instance for the current state in order to update UI, etc.

        for_display is a boolean indicating whether the line is relevant to the
        user. This is typically used to filter whether the line should be
        presented to the user.
        """
        if line.startswith('BUILDSTATUS'):
            args = line.split()[1:]

            action = args.pop(0)
            update_needed = True

            if action == 'TIERS':
                self.tiers.set_tiers(args)
                update_needed = False
            elif action == 'TIER_START':
                tier = args[0]
                subtiers = args[1:]
                self.tiers.begin_tier(tier, subtiers)
            elif action == 'TIER_FINISH':
                tier, = args
                self.tiers.finish_tier(tier)
            elif action == 'SUBTIER_START':
                tier, subtier = args[0:2]
                dirs = args[2:]
                self.tiers.begin_subtier(tier, subtier, dirs)
            elif action == 'SUBTIER_FINISH':
                tier, subtier = args
                self.tiers.finish_subtier(tier, subtier)
            elif action == 'TIERDIR_START':
                tier, subtier, d = args
                self.tiers.begin_dir(tier, subtier, d)
            elif action == 'TIERDIR_FINISH':
                tier, subtier, d = args
                self.tiers.finish_dir(tier, subtier, d)
            else:
                raise Exception('Unknown build status: %s' % action)

            return BuildOutputResult(None, update_needed, False)

        warning = None

        try:
            warning = self._warnings_collector.process_line(line)
        except:
            pass

        return BuildOutputResult(warning, False, True)

    def finish(self, record_usage=True):
        """Record the end of the build."""
        self.end_time = time.time()

        if self._resources_started:
            self.resources.stop()

        self._finder_end_cpu = self._get_finder_cpu_usage()
        self.elapsed = self.end_time - self.start_time

        self.warnings_database.prune()
        self.warnings_database.save_to_file(self._warnings_path)

        if not record_usage:
            return

        usage = self.record_resource_usage()
        if not usage:
            return

        with open(self._get_state_filename('build_resources.json'), 'w') as fh:
            json.dump(usage, fh, indent=2)

    def _get_finder_cpu_usage(self):
        """Obtain the CPU usage of the Finder app on OS X.

        This is used to detect high CPU usage.
        """
        if not sys.platform.startswith('darwin'):
            return None

        if not psutil:
            return None

        for proc in psutil.process_iter():
            if proc.name != 'Finder':
                continue

            if proc.username != getpass.getuser():
                continue

            # Try to isolate system finder as opposed to other "Finder"
            # processes.
            if not proc.exe.endswith(
                    'CoreServices/Finder.app/Contents/MacOS/Finder'):
                continue

            return proc.get_cpu_times()

        return None

    def have_high_finder_usage(self):
        """Determine whether there was high Finder CPU usage during the build.

        Returns True if there was high Finder CPU usage, False if there wasn't,
        or None if there is nothing to report.
        """
        if not self._finder_start_cpu:
            return None, None

        # We only measure if the measured range is sufficiently long.
        if self.elapsed < 15:
            return None, None

        if not self._finder_end_cpu:
            return None, None

        start = self._finder_start_cpu
        end = self._finder_end_cpu

        start_total = start.user + start.system
        end_total = end.user + end.system

        cpu_seconds = end_total - start_total

        # If Finder used more than 25% of 1 core during the build, report an
        # error.
        finder_percent = cpu_seconds / self.elapsed * 100

        return finder_percent > 25, finder_percent

    @property
    def have_resource_usage(self):
        """Whether resource usage is available."""
        return self.resources.start_time is not None

    def record_resource_usage(self):
        """Record the resource usage of this build.

        We write a log message containing a high-level summary. We also produce
        a data structure containing the low-level resource usage information.
        This data structure can e.g. be serialized into JSON and saved for
        subsequent analysis.

        If no resource usage is available, None is returned.
        """
        if not self.have_resource_usage:
            return None

        cpu_percent = self.resources.aggregate_cpu_percent(phase=None,
                                                           per_cpu=False)
        cpu_times = self.resources.aggregate_cpu_times(phase=None,
                                                       per_cpu=False)
        io = self.resources.aggregate_io(phase=None)

        self._log_resource_usage('Overall system resources', 'resource_usage',
                                 self.end_time - self.start_time, cpu_percent,
                                 cpu_times, io)

        o = dict(
            version=1,
            start=self.start_time,
            end=self.end_time,
            duration=self.end_time - self.start_time,
            resources=[],
            cpu_percent=cpu_percent,
            cpu_times=cpu_times,
            io=io,
        )

        o['tiers'] = self.tiers.tiered_resource_usage()

        self.tiers.add_resource_fields_to_dict(o)

        for usage in self.resources.range_usage():
            cpu_percent = self.resources.aggregate_cpu_percent(usage.start,
                                                               usage.end,
                                                               per_cpu=False)
            cpu_times = self.resources.aggregate_cpu_times(usage.start,
                                                           usage.end,
                                                           per_cpu=False)

            entry = dict(
                start=usage.start,
                end=usage.end,
                virt=list(usage.virt),
                swap=list(usage.swap),
            )

            self.tiers.add_resources_to_dict(entry,
                                             start=usage.start,
                                             end=usage.end)

            o['resources'].append(entry)

        return o

    def _log_resource_usage(self,
                            prefix,
                            m_type,
                            duration,
                            cpu_percent,
                            cpu_times,
                            io,
                            extra_params={}):

        params = dict(
            duration=duration,
            cpu_percent=cpu_percent,
            io_reads=io.read_count,
            io_writes=io.write_count,
            io_read_bytes=io.read_bytes,
            io_write_bytes=io.write_bytes,
            io_read_time=io.read_time,
            io_write_time=io.write_time,
        )

        params.update(extra_params)

        message = prefix + ' - Wall time: {duration:.0f}s; ' \
            'CPU: {cpu_percent:.0f}%; ' \
            'Read bytes: {io_read_bytes}; Write bytes: {io_write_bytes}; ' \
            'Read time: {io_read_time}; Write time: {io_write_time}'

        self.log(logging.WARNING, m_type, params, message)
Example #21
0
class BuildMonitor(MozbuildObject):
    """Monitors the output of the build."""
    def init(self, warnings_path):
        """Create a new monitor.

        warnings_path is a path of a warnings database to use.
        """
        self._warnings_path = warnings_path
        self.resources = SystemResourceMonitor(poll_interval=1.0)
        self._resources_started = False

        self.tiers = TierStatus(self.resources)

        self.warnings_database = WarningsDatabase()
        if os.path.exists(warnings_path):
            try:
                self.warnings_database.load_from_file(warnings_path)
            except ValueError:
                os.remove(warnings_path)

        self._warnings_collector = WarningsCollector(
            database=self.warnings_database, objdir=self.topobjdir)

        self.build_objects = []

    def start(self):
        """Record the start of the build."""
        self.start_time = time.time()
        self._finder_start_cpu = self._get_finder_cpu_usage()

    def start_resource_recording(self):
        # This should be merged into start() once bug 892342 lands.
        self.resources.start()
        self._resources_started = True

    def on_line(self, line):
        """Consume a line of output from the build system.

        This will parse the line for state and determine whether more action is
        needed.

        Returns a BuildOutputResult instance.

        In this named tuple, warning will be an object describing a new parsed
        warning. Otherwise it will be None.

        state_changed indicates whether the build system changed state with
        this line. If the build system changed state, the caller may want to
        query this instance for the current state in order to update UI, etc.

        for_display is a boolean indicating whether the line is relevant to the
        user. This is typically used to filter whether the line should be
        presented to the user.
        """
        if line.startswith('BUILDSTATUS'):
            args = line.split()[1:]

            action = args.pop(0)
            update_needed = True

            if action == 'TIERS':
                self.tiers.set_tiers(args)
                update_needed = False
            elif action == 'TIER_START':
                tier = args[0]
                self.tiers.begin_tier(tier)
            elif action == 'TIER_FINISH':
                tier, = args
                self.tiers.finish_tier(tier)
            elif action == 'OBJECT_FILE':
                self.build_objects.append(args[0])
                update_needed = False
            else:
                raise Exception('Unknown build status: %s' % action)

            return BuildOutputResult(None, update_needed, False)

        warning = None

        try:
            warning = self._warnings_collector.process_line(line)
        except:
            pass

        return BuildOutputResult(warning, False, True)

    def stop_resource_recording(self):
        if self._resources_started:
            self.resources.stop()

        self._resources_started = False

    def finish(self, record_usage=True):
        """Record the end of the build."""
        self.stop_resource_recording()
        self.end_time = time.time()
        self._finder_end_cpu = self._get_finder_cpu_usage()
        self.elapsed = self.end_time - self.start_time

        self.warnings_database.prune()
        self.warnings_database.save_to_file(self._warnings_path)

        if not record_usage:
            return

        try:
            usage = self.get_resource_usage()
            if not usage:
                return

            self.log_resource_usage(usage)
            with open(self._get_state_filename('build_resources.json'),
                      'w') as fh:
                json.dump(self.resources.as_dict(), fh, indent=2)
        except Exception as e:
            self.log(logging.WARNING, 'build_resources_error', {'msg': str(e)},
                     'Exception when writing resource usage file: {msg}')

    def _get_finder_cpu_usage(self):
        """Obtain the CPU usage of the Finder app on OS X.

        This is used to detect high CPU usage.
        """
        if not sys.platform.startswith('darwin'):
            return None

        if not psutil:
            return None

        for proc in psutil.process_iter():
            if proc.name != 'Finder':
                continue

            if proc.username != getpass.getuser():
                continue

            # Try to isolate system finder as opposed to other "Finder"
            # processes.
            if not proc.exe.endswith(
                    'CoreServices/Finder.app/Contents/MacOS/Finder'):
                continue

            return proc.get_cpu_times()

        return None

    def have_high_finder_usage(self):
        """Determine whether there was high Finder CPU usage during the build.

        Returns True if there was high Finder CPU usage, False if there wasn't,
        or None if there is nothing to report.
        """
        if not self._finder_start_cpu:
            return None, None

        # We only measure if the measured range is sufficiently long.
        if self.elapsed < 15:
            return None, None

        if not self._finder_end_cpu:
            return None, None

        start = self._finder_start_cpu
        end = self._finder_end_cpu

        start_total = start.user + start.system
        end_total = end.user + end.system

        cpu_seconds = end_total - start_total

        # If Finder used more than 25% of 1 core during the build, report an
        # error.
        finder_percent = cpu_seconds / self.elapsed * 100

        return finder_percent > 25, finder_percent

    def have_excessive_swapping(self):
        """Determine whether there was excessive swapping during the build.

        Returns a tuple of (excessive, swap_in, swap_out). All values are None
        if no swap information is available.
        """
        if not self.have_resource_usage:
            return None, None, None

        swap_in = sum(m.swap.sin for m in self.resources.measurements)
        swap_out = sum(m.swap.sout for m in self.resources.measurements)

        # The threshold of 1024 MB has been arbitrarily chosen.
        #
        # Choosing a proper value that is ideal for everyone is hard. We will
        # likely iterate on the logic until people are generally satisfied.
        # If a value is too low, the eventual warning produced does not carry
        # much meaning. If the threshold is too high, people may not see the
        # warning and the warning will thus be ineffective.
        excessive = swap_in > 512 * 1048576 or swap_out > 512 * 1048576
        return excessive, swap_in, swap_out

    @property
    def have_resource_usage(self):
        """Whether resource usage is available."""
        return self.resources.start_time is not None

    def get_resource_usage(self):
        """ Produce a data structure containing the low-level resource usage information.

        This data structure can e.g. be serialized into JSON and saved for
        subsequent analysis.

        If no resource usage is available, None is returned.
        """
        if not self.have_resource_usage:
            return None

        cpu_percent = self.resources.aggregate_cpu_percent(phase=None,
                                                           per_cpu=False)
        cpu_times = self.resources.aggregate_cpu_times(phase=None,
                                                       per_cpu=False)
        io = self.resources.aggregate_io(phase=None)

        o = dict(version=3,
                 argv=sys.argv,
                 start=self.start_time,
                 end=self.end_time,
                 duration=self.end_time - self.start_time,
                 resources=[],
                 cpu_percent=cpu_percent,
                 cpu_times=cpu_times,
                 io=io,
                 objects=self.build_objects)

        o['tiers'] = self.tiers.tiered_resource_usage()

        self.tiers.add_resource_fields_to_dict(o)

        for usage in self.resources.range_usage():
            cpu_percent = self.resources.aggregate_cpu_percent(usage.start,
                                                               usage.end,
                                                               per_cpu=False)
            cpu_times = self.resources.aggregate_cpu_times(usage.start,
                                                           usage.end,
                                                           per_cpu=False)

            entry = dict(
                start=usage.start,
                end=usage.end,
                virt=list(usage.virt),
                swap=list(usage.swap),
            )

            self.tiers.add_resources_to_dict(entry,
                                             start=usage.start,
                                             end=usage.end)

            o['resources'].append(entry)

        # If the imports for this file ran before the in-tree virtualenv
        # was bootstrapped (for instance, for a clobber build in automation),
        # psutil might not be available.
        #
        # Treat psutil as optional to avoid an outright failure to log resources
        # TODO: it would be nice to collect data on the storage device as well
        # in this case.
        o['system'] = {}
        if psutil:
            o['system'].update(
                dict(
                    logical_cpu_count=psutil.cpu_count(),
                    physical_cpu_count=psutil.cpu_count(logical=False),
                    swap_total=psutil.swap_memory()[0],
                    vmem_total=psutil.virtual_memory()[0],
                ))

        return o

    def log_resource_usage(self, usage):
        """Summarize the resource usage of this build in a log message."""

        if not usage:
            return

        params = dict(
            duration=self.end_time - self.start_time,
            cpu_percent=usage['cpu_percent'],
            io_read_bytes=usage['io'].read_bytes,
            io_write_bytes=usage['io'].write_bytes,
            io_read_time=usage['io'].read_time,
            io_write_time=usage['io'].write_time,
        )

        message = 'Overall system resources - Wall time: {duration:.0f}s; ' \
            'CPU: {cpu_percent:.0f}%; ' \
            'Read bytes: {io_read_bytes}; Write bytes: {io_write_bytes}; ' \
            'Read time: {io_read_time}; Write time: {io_write_time}'

        self.log(logging.WARNING, 'resource_usage', params, message)

        excessive, sin, sout = self.have_excessive_swapping()
        if excessive is not None and (sin or sout):
            sin /= 1048576
            sout /= 1048576
            self.log(logging.WARNING, 'swap_activity', {
                'sin': sin,
                'sout': sout
            }, 'Swap in/out (MB): {sin}/{sout}')

    def ccache_stats(self):
        ccache_stats = None

        try:
            ccache = which.which('ccache')
            output = subprocess.check_output([ccache, '-s'])
            ccache_stats = CCacheStats(output)
        except which.WhichError:
            pass
        except ValueError as e:
            self.log(logging.WARNING, 'ccache', {'msg': str(e)}, '{msg}')

        return ccache_stats
Example #22
0
class ResourceMonitoringMixin(object):
    """Provides resource monitoring capabilities to scripts.

    When this class is in the inheritance chain, resource usage stats of the
    executing script will be recorded.

    This class requires the VirtualenvMixin in order to install a package used
    for recording resource usage.

    While we would like to record resource usage for the entirety of a script,
    since we require an external package, we can only record resource usage
    after that package is installed (as part of creating the virtualenv).
    That's just the way things have to be.
    """
    def __init__(self, *args, **kwargs):
        super(ResourceMonitoringMixin, self).__init__(*args, **kwargs)

        self.register_virtualenv_module('psutil>=3.1.1', method='pip',
                                        optional=True)
        self.register_virtualenv_module('mozsystemmonitor==0.3',
                                        method='pip', optional=True)
        self._resource_monitor = None

    @PostScriptAction('create-virtualenv')
    def _start_resource_monitoring(self, action, success=None):
        self.activate_virtualenv()

        # Resource Monitor requires Python 2.7, however it's currently optional.
        # Remove when all machines have had their Python version updated (bug 711299).
        if sys.version_info[:2] < (2, 7):
            self.warning('Resource monitoring will not be enabled! Python 2.7+ required.')
            return

        try:
            from mozsystemmonitor.resourcemonitor import SystemResourceMonitor

            self.info("Starting resource monitoring.")
            self._resource_monitor = SystemResourceMonitor(poll_interval=1.0)
            self._resource_monitor.start()
        except Exception:
            self.warning("Unable to start resource monitor: %s" %
                         traceback.format_exc())

    @PreScriptAction
    def _resource_record_pre_action(self, action):
        # Resource monitor isn't available until after create-virtualenv.
        if not self._resource_monitor:
            return

        self._resource_monitor.begin_phase(action)

    @PostScriptAction
    def _resource_record_post_action(self, action, success=None):
        # Resource monitor isn't available until after create-virtualenv.
        if not self._resource_monitor:
            return

        self._resource_monitor.finish_phase(action)

    @PostScriptRun
    def _resource_record_post_run(self):
        if not self._resource_monitor:
            return

        # This should never raise an exception. This is a workaround until
        # mozsystemmonitor is fixed. See bug 895388.
        try:
            self._resource_monitor.stop()
            self._log_resource_usage()

            # Upload a JSON file containing the raw resource data.
            try:
                upload_dir = self.query_abs_dirs()['abs_blob_upload_dir']
                with open(os.path.join(upload_dir, 'resource-usage.json'), 'wb') as fh:
                    json.dump(self._resource_monitor.as_dict(), fh,
                              sort_keys=True, indent=4)
            except (AttributeError, KeyError):
                self.exception('could not upload resource usage JSON',
                               level=WARNING)

        except Exception:
            self.warning("Exception when reporting resource usage: %s" %
                         traceback.format_exc())

    def _log_resource_usage(self):
        rm = self._resource_monitor

        if rm.start_time is None:
            return

        def resources(phase):
            cpu_percent = rm.aggregate_cpu_percent(phase=phase, per_cpu=False)
            cpu_times = rm.aggregate_cpu_times(phase=phase, per_cpu=False)
            io = rm.aggregate_io(phase=phase)

            swap_in = sum(m.swap.sin for m in rm.measurements)
            swap_out = sum(m.swap.sout for m in rm.measurements)

            return cpu_percent, cpu_times, io, (swap_in, swap_out)

        def log_usage(prefix, duration, cpu_percent, cpu_times, io):
            message = '{prefix} - Wall time: {duration:.0f}s; ' \
                'CPU: {cpu_percent}; ' \
                'Read bytes: {io_read_bytes}; Write bytes: {io_write_bytes}; ' \
                'Read time: {io_read_time}; Write time: {io_write_time}'

            # XXX Some test harnesses are complaining about a string being
            # being fed into a 'f' formatter. This will help diagnose the
            # issue.
            cpu_percent_str = str(round(cpu_percent)) + '%' if cpu_percent else "Can't collect data"

            try:
                self.info(
                    message.format(
                        prefix=prefix, duration=duration,
                        cpu_percent=cpu_percent_str, io_read_bytes=io.read_bytes,
                        io_write_bytes=io.write_bytes, io_read_time=io.read_time,
                        io_write_time=io.write_time
                    )
                )

            except ValueError:
                self.warning("Exception when formatting: %s" %
                             traceback.format_exc())

        cpu_percent, cpu_times, io, (swap_in, swap_out) = resources(None)
        duration = rm.end_time - rm.start_time

        log_usage('Total resource usage', duration, cpu_percent, cpu_times, io)

        # Print special messages so usage shows up in Treeherder.
        if cpu_percent:
            self._tinderbox_print('CPU usage<br/>{:,.1f}%'.format(
                                  cpu_percent))

        self._tinderbox_print('I/O read bytes / time<br/>{:,} / {:,}'.format(
                              io.read_bytes, io.read_time))
        self._tinderbox_print('I/O write bytes / time<br/>{:,} / {:,}'.format(
                              io.write_bytes, io.write_time))

        # Print CPU components having >1%. "cpu_times" is a data structure
        # whose attributes are measurements. Ideally we'd have an API that
        # returned just the measurements as a dict or something.
        cpu_attrs = []
        for attr in sorted(dir(cpu_times)):
            if attr.startswith('_'):
                continue
            if attr in ('count', 'index'):
                continue
            cpu_attrs.append(attr)

        cpu_total = sum(getattr(cpu_times, attr) for attr in cpu_attrs)

        for attr in cpu_attrs:
            value = getattr(cpu_times, attr)
            percent = value / cpu_total * 100.0
            if percent > 1.00:
                self._tinderbox_print('CPU {}<br/>{:,.1f} ({:,.1f}%)'.format(
                                      attr, value, percent))

        # Swap on Windows isn't reported by psutil.
        if not self._is_windows():
            self._tinderbox_print('Swap in / out<br/>{:,} / {:,}'.format(
                                  swap_in, swap_out))

        for phase in rm.phases.keys():
            start_time, end_time = rm.phases[phase]
            cpu_percent, cpu_times, io, swap = resources(phase)
            log_usage(phase, end_time - start_time, cpu_percent, cpu_times, io)

    def _tinderbox_print(self, message):
        self.info('TinderboxPrint: %s' % message)
    def test_as_dict(self):
        monitor = SystemResourceMonitor(poll_interval=0.25)

        monitor.start()
        time.sleep(0.1)
        monitor.begin_phase('phase1')
        monitor.record_event('foo')
        time.sleep(0.1)
        monitor.begin_phase('phase2')
        monitor.record_event('bar')
        time.sleep(0.2)
        monitor.finish_phase('phase1')
        time.sleep(0.2)
        monitor.finish_phase('phase2')
        time.sleep(0.4)
        monitor.stop()

        d = monitor.as_dict()

        self.assertEqual(d['version'], 2)
        self.assertEqual(len(d['events']), 2)
        self.assertEqual(len(d['phases']), 2)
        self.assertIn('system', d)
        self.assertIsInstance(d['system'], dict)
        self.assertIsInstance(d['overall'], dict)
        self.assertIn('duration', d['overall'])
        self.assertIn('cpu_times', d['overall'])
    def test_as_dict(self):
        monitor = SystemResourceMonitor(poll_interval=0.25)

        monitor.start()
        time.sleep(0.1)
        monitor.begin_phase('phase1')
        monitor.record_event('foo')
        time.sleep(0.1)
        monitor.begin_phase('phase2')
        monitor.record_event('bar')
        time.sleep(0.2)
        monitor.finish_phase('phase1')
        time.sleep(0.2)
        monitor.finish_phase('phase2')
        time.sleep(0.4)
        monitor.stop()

        d = monitor.as_dict()

        self.assertEqual(d['version'], 2)
        self.assertEqual(len(d['events']), 2)
        self.assertEqual(len(d['phases']), 2)
        self.assertIn('system', d)
        self.assertIsInstance(d['system'], dict)
        self.assertIsInstance(d['overall'], dict)
        self.assertIn('duration', d['overall'])
        self.assertIn('cpu_times', d['overall'])
Example #25
0
class ResourceMonitoringMixin(PerfherderResourceOptionsMixin):
    """Provides resource monitoring capabilities to scripts.

    When this class is in the inheritance chain, resource usage stats of the
    executing script will be recorded.

    This class requires the VirtualenvMixin in order to install a package used
    for recording resource usage.

    While we would like to record resource usage for the entirety of a script,
    since we require an external package, we can only record resource usage
    after that package is installed (as part of creating the virtualenv).
    That's just the way things have to be.
    """
    def __init__(self, *args, **kwargs):
        super(ResourceMonitoringMixin, self).__init__(*args, **kwargs)

        self.register_virtualenv_module('psutil>=3.1.1', method='pip',
                                        optional=True)
        self.register_virtualenv_module('mozsystemmonitor==0.3',
                                        method='pip', optional=True)
        self.register_virtualenv_module('jsonschema==2.5.1',
                                        method='pip')
        # explicitly install functools32, because some slaves aren't using
        # a version of pip recent enough to install it automatically with
        # jsonschema (which depends on it)
        # https://github.com/Julian/jsonschema/issues/233
        self.register_virtualenv_module('functools32==3.2.3-2',
                                        method='pip')
        self._resource_monitor = None

        # 2-tuple of (name, options) to assign Perfherder resource monitor
        # metrics to. This needs to be assigned by a script in order for
        # Perfherder metrics to be reported.
        self.resource_monitor_perfherder_id = None

    @PostScriptAction('create-virtualenv')
    def _start_resource_monitoring(self, action, success=None):
        self.activate_virtualenv()

        # Resource Monitor requires Python 2.7, however it's currently optional.
        # Remove when all machines have had their Python version updated (bug 711299).
        if sys.version_info[:2] < (2, 7):
            self.warning('Resource monitoring will not be enabled! Python 2.7+ required.')
            return

        try:
            from mozsystemmonitor.resourcemonitor import SystemResourceMonitor

            self.info("Starting resource monitoring.")
            self._resource_monitor = SystemResourceMonitor(poll_interval=1.0)
            self._resource_monitor.start()
        except Exception:
            self.warning("Unable to start resource monitor: %s" %
                         traceback.format_exc())

    @PreScriptAction
    def _resource_record_pre_action(self, action):
        # Resource monitor isn't available until after create-virtualenv.
        if not self._resource_monitor:
            return

        self._resource_monitor.begin_phase(action)

    @PostScriptAction
    def _resource_record_post_action(self, action, success=None):
        # Resource monitor isn't available until after create-virtualenv.
        if not self._resource_monitor:
            return

        self._resource_monitor.finish_phase(action)

    @PostScriptRun
    def _resource_record_post_run(self):
        if not self._resource_monitor:
            return

        # This should never raise an exception. This is a workaround until
        # mozsystemmonitor is fixed. See bug 895388.
        try:
            self._resource_monitor.stop()
            self._log_resource_usage()

            # Upload a JSON file containing the raw resource data.
            try:
                upload_dir = self.query_abs_dirs()['abs_blob_upload_dir']
                if not os.path.exists(upload_dir):
                    os.makedirs(upload_dir)
                with open(os.path.join(upload_dir, 'resource-usage.json'), 'wb') as fh:
                    json.dump(self._resource_monitor.as_dict(), fh,
                              sort_keys=True, indent=4)
            except (AttributeError, KeyError):
                self.exception('could not upload resource usage JSON',
                               level=WARNING)

        except Exception:
            self.warning("Exception when reporting resource usage: %s" %
                         traceback.format_exc())

    def _log_resource_usage(self):
        # Delay import because not available until virtualenv is populated.
        import jsonschema

        rm = self._resource_monitor

        if rm.start_time is None:
            return

        def resources(phase):
            cpu_percent = rm.aggregate_cpu_percent(phase=phase, per_cpu=False)
            cpu_times = rm.aggregate_cpu_times(phase=phase, per_cpu=False)
            io = rm.aggregate_io(phase=phase)

            swap_in = sum(m.swap.sin for m in rm.measurements)
            swap_out = sum(m.swap.sout for m in rm.measurements)

            return cpu_percent, cpu_times, io, (swap_in, swap_out)

        def log_usage(prefix, duration, cpu_percent, cpu_times, io):
            message = '{prefix} - Wall time: {duration:.0f}s; ' \
                'CPU: {cpu_percent}; ' \
                'Read bytes: {io_read_bytes}; Write bytes: {io_write_bytes}; ' \
                'Read time: {io_read_time}; Write time: {io_write_time}'

            # XXX Some test harnesses are complaining about a string being
            # being fed into a 'f' formatter. This will help diagnose the
            # issue.
            cpu_percent_str = str(round(cpu_percent)) + '%' if cpu_percent else "Can't collect data"

            try:
                self.info(
                    message.format(
                        prefix=prefix, duration=duration,
                        cpu_percent=cpu_percent_str, io_read_bytes=io.read_bytes,
                        io_write_bytes=io.write_bytes, io_read_time=io.read_time,
                        io_write_time=io.write_time
                    )
                )

            except ValueError:
                self.warning("Exception when formatting: %s" %
                             traceback.format_exc())

        cpu_percent, cpu_times, io, (swap_in, swap_out) = resources(None)
        duration = rm.end_time - rm.start_time

        # Write out Perfherder data if configured.
        if self.resource_monitor_perfherder_id:
            perfherder_name, perfherder_options = self.resource_monitor_perfherder_id

            suites = []
            overall = []

            if cpu_percent:
                overall.append({
                    'name': 'cpu_percent',
                    'value': cpu_percent,
                })

            overall.extend([
                {'name': 'io_write_bytes', 'value': io.write_bytes},
                {'name': 'io.read_bytes', 'value': io.read_bytes},
                {'name': 'io_write_time', 'value': io.write_time},
                {'name': 'io_read_time', 'value': io.read_time},
            ])

            suites.append({
                'name': '%s.overall' % perfherder_name,
                'extraOptions': perfherder_options + self.perfherder_resource_options(),
                'subtests': overall,

            })

            for phase in rm.phases.keys():
                phase_duration = rm.phases[phase][1] - rm.phases[phase][0]
                subtests = [
                    {
                        'name': 'time',
                        'value': phase_duration,
                    }
                ]
                cpu_percent = rm.aggregate_cpu_percent(phase=phase,
                                                       per_cpu=False)
                if cpu_percent is not None:
                    subtests.append({
                        'name': 'cpu_percent',
                        'value': rm.aggregate_cpu_percent(phase=phase,
                                                          per_cpu=False),
                    })

                # We don't report I/O during each step because measured I/O
                # is system I/O and that I/O can be delayed (e.g. writes will
                # buffer before being flushed and recorded in our metrics).
                suites.append({
                    'name': '%s.%s' % (perfherder_name, phase),
                    'subtests': subtests,
                })

            data = {
                'framework': {'name': 'job_resource_usage'},
                'suites': suites,
            }

            schema_path = os.path.join(external_tools_path,
                                       'performance-artifact-schema.json')
            with open(schema_path, 'rb') as fh:
                schema = json.load(fh)

            # this will throw an exception that causes the job to fail if the
            # perfherder data is not valid -- please don't change this
            # behaviour, otherwise people will inadvertently break this
            # functionality
            self.info('Validating Perfherder data against %s' % schema_path)
            jsonschema.validate(data, schema)
            self.info('PERFHERDER_DATA: %s' % json.dumps(data))

        log_usage('Total resource usage', duration, cpu_percent, cpu_times, io)

        # Print special messages so usage shows up in Treeherder.
        if cpu_percent:
            self._tinderbox_print('CPU usage<br/>{:,.1f}%'.format(
                                  cpu_percent))

        self._tinderbox_print('I/O read bytes / time<br/>{:,} / {:,}'.format(
                              io.read_bytes, io.read_time))
        self._tinderbox_print('I/O write bytes / time<br/>{:,} / {:,}'.format(
                              io.write_bytes, io.write_time))

        # Print CPU components having >1%. "cpu_times" is a data structure
        # whose attributes are measurements. Ideally we'd have an API that
        # returned just the measurements as a dict or something.
        cpu_attrs = []
        for attr in sorted(dir(cpu_times)):
            if attr.startswith('_'):
                continue
            if attr in ('count', 'index'):
                continue
            cpu_attrs.append(attr)

        cpu_total = sum(getattr(cpu_times, attr) for attr in cpu_attrs)

        for attr in cpu_attrs:
            value = getattr(cpu_times, attr)
            # cpu_total can be 0.0. Guard against division by 0.
            percent = value / cpu_total * 100.0 if cpu_total else 0.0

            if percent > 1.00:
                self._tinderbox_print('CPU {}<br/>{:,.1f} ({:,.1f}%)'.format(
                                      attr, value, percent))

        # Swap on Windows isn't reported by psutil.
        if not self._is_windows():
            self._tinderbox_print('Swap in / out<br/>{:,} / {:,}'.format(
                                  swap_in, swap_out))

        for phase in rm.phases.keys():
            start_time, end_time = rm.phases[phase]
            cpu_percent, cpu_times, io, swap = resources(phase)
            log_usage(phase, end_time - start_time, cpu_percent, cpu_times, io)

    def _tinderbox_print(self, message):
        self.info('TinderboxPrint: %s' % message)
    def test_as_dict(self):
        monitor = SystemResourceMonitor(poll_interval=0.25)

        monitor.start()
        time.sleep(0.1)
        monitor.begin_phase('phase1')
        monitor.record_event('foo')
        time.sleep(0.1)
        monitor.begin_phase('phase2')
        monitor.record_event('bar')
        time.sleep(0.2)
        monitor.finish_phase('phase1')
        time.sleep(0.2)
        monitor.finish_phase('phase2')
        time.sleep(0.4)
        monitor.stop()

        d = monitor.as_dict()

        self.assertEqual(d['version'], 1)
        self.assertEqual(len(d['events']), 2)
        self.assertEqual(len(d['phases']), 2)
Example #27
0
class BuildMonitor(MozbuildObject):
    """Monitors the output of the build."""

    def init(self, warnings_path):
        """Create a new monitor.

        warnings_path is a path of a warnings database to use.
        """
        self._warnings_path = warnings_path
        self.resources = SystemResourceMonitor(poll_interval=1.0)
        self._resources_started = False

        self.tiers = TierStatus(self.resources)

        self.warnings_database = WarningsDatabase()
        if os.path.exists(warnings_path):
            try:
                self.warnings_database.load_from_file(warnings_path)
            except ValueError:
                os.remove(warnings_path)

        self._warnings_collector = WarningsCollector(
            database=self.warnings_database, objdir=self.topobjdir)

    def start(self):
        """Record the start of the build."""
        self.start_time = time.time()
        self._finder_start_cpu = self._get_finder_cpu_usage()

    def start_resource_recording(self):
        # This should be merged into start() once bug 892342 lands.
        self.resources.start()
        self._resources_started = True

    def on_line(self, line):
        """Consume a line of output from the build system.

        This will parse the line for state and determine whether more action is
        needed.

        Returns a BuildOutputResult instance.

        In this named tuple, warning will be an object describing a new parsed
        warning. Otherwise it will be None.

        state_changed indicates whether the build system changed state with
        this line. If the build system changed state, the caller may want to
        query this instance for the current state in order to update UI, etc.

        for_display is a boolean indicating whether the line is relevant to the
        user. This is typically used to filter whether the line should be
        presented to the user.
        """
        if line.startswith('BUILDSTATUS'):
            args = line.split()[1:]

            action = args.pop(0)
            update_needed = True

            if action == 'TIERS':
                self.tiers.set_tiers(args)
                update_needed = False
            elif action == 'TIER_START':
                tier = args[0]
                self.tiers.begin_tier(tier)
            elif action == 'TIER_FINISH':
                tier, = args
                self.tiers.finish_tier(tier)
            else:
                raise Exception('Unknown build status: %s' % action)

            return BuildOutputResult(None, update_needed, False)

        warning = None

        try:
            warning = self._warnings_collector.process_line(line)
        except:
            pass

        return BuildOutputResult(warning, False, True)

    def finish(self, record_usage=True):
        """Record the end of the build."""
        self.end_time = time.time()

        if self._resources_started:
            self.resources.stop()

        self._finder_end_cpu = self._get_finder_cpu_usage()
        self.elapsed = self.end_time - self.start_time

        self.warnings_database.prune()
        self.warnings_database.save_to_file(self._warnings_path)

        if not record_usage:
            return

        try:
            usage = self.record_resource_usage()
            if not usage:
                return

            with open(self._get_state_filename('build_resources.json'), 'w') as fh:
                json.dump(usage, fh, indent=2)
        except Exception as e:
            self.log(logging.WARNING, 'build_resources_error',
                {'msg': str(e)},
                'Exception when writing resource usage file: {msg}')

    def _get_finder_cpu_usage(self):
        """Obtain the CPU usage of the Finder app on OS X.

        This is used to detect high CPU usage.
        """
        if not sys.platform.startswith('darwin'):
            return None

        if not psutil:
            return None

        for proc in psutil.process_iter():
            if proc.name != 'Finder':
                continue

            if proc.username != getpass.getuser():
                continue

            # Try to isolate system finder as opposed to other "Finder"
            # processes.
            if not proc.exe.endswith('CoreServices/Finder.app/Contents/MacOS/Finder'):
                continue

            return proc.get_cpu_times()

        return None

    def have_high_finder_usage(self):
        """Determine whether there was high Finder CPU usage during the build.

        Returns True if there was high Finder CPU usage, False if there wasn't,
        or None if there is nothing to report.
        """
        if not self._finder_start_cpu:
            return None, None

        # We only measure if the measured range is sufficiently long.
        if self.elapsed < 15:
            return None, None

        if not self._finder_end_cpu:
            return None, None

        start = self._finder_start_cpu
        end = self._finder_end_cpu

        start_total = start.user + start.system
        end_total = end.user + end.system

        cpu_seconds = end_total - start_total

        # If Finder used more than 25% of 1 core during the build, report an
        # error.
        finder_percent = cpu_seconds / self.elapsed * 100

        return finder_percent > 25, finder_percent

    def have_excessive_swapping(self):
        """Determine whether there was excessive swapping during the build.

        Returns a tuple of (excessive, swap_in, swap_out). All values are None
        if no swap information is available.
        """
        if not self.have_resource_usage:
            return None, None, None

        swap_in = sum(m.swap.sin for m in self.resources.measurements)
        swap_out = sum(m.swap.sout for m in self.resources.measurements)

        # The threshold of 1024 MB has been arbitrarily chosen.
        #
        # Choosing a proper value that is ideal for everyone is hard. We will
        # likely iterate on the logic until people are generally satisfied.
        # If a value is too low, the eventual warning produced does not carry
        # much meaning. If the threshold is too high, people may not see the
        # warning and the warning will thus be ineffective.
        excessive = swap_in > 512 * 1048576 or swap_out > 512 * 1048576
        return excessive, swap_in, swap_out

    @property
    def have_resource_usage(self):
        """Whether resource usage is available."""
        return self.resources.start_time is not None

    def record_resource_usage(self):
        """Record the resource usage of this build.

        We write a log message containing a high-level summary. We also produce
        a data structure containing the low-level resource usage information.
        This data structure can e.g. be serialized into JSON and saved for
        subsequent analysis.

        If no resource usage is available, None is returned.
        """
        if not self.have_resource_usage:
            return None

        cpu_percent = self.resources.aggregate_cpu_percent(phase=None,
            per_cpu=False)
        cpu_times = self.resources.aggregate_cpu_times(phase=None,
            per_cpu=False)
        io = self.resources.aggregate_io(phase=None)

        self._log_resource_usage('Overall system resources', 'resource_usage',
            self.end_time - self.start_time, cpu_percent, cpu_times, io)

        excessive, sin, sout = self.have_excessive_swapping()
        if excessive is not None and (sin or sout):
            sin /= 1048576
            sout /= 1048576
            self.log(logging.WARNING, 'swap_activity',
                {'sin': sin, 'sout': sout},
                'Swap in/out (MB): {sin}/{sout}')

        o = dict(
            version=1,
            start=self.start_time,
            end=self.end_time,
            duration=self.end_time - self.start_time,
            resources=[],
            cpu_percent=cpu_percent,
            cpu_times=cpu_times,
            io=io,
        )

        o['tiers'] = self.tiers.tiered_resource_usage()

        self.tiers.add_resource_fields_to_dict(o)

        for usage in self.resources.range_usage():
            cpu_percent = self.resources.aggregate_cpu_percent(usage.start,
                usage.end, per_cpu=False)
            cpu_times = self.resources.aggregate_cpu_times(usage.start,
                usage.end, per_cpu=False)

            entry = dict(
                start=usage.start,
                end=usage.end,
                virt=list(usage.virt),
                swap=list(usage.swap),
            )

            self.tiers.add_resources_to_dict(entry, start=usage.start,
                    end=usage.end)

            o['resources'].append(entry)

        return o

    def _log_resource_usage(self, prefix, m_type, duration, cpu_percent,
        cpu_times, io, extra_params={}):

        params = dict(
            duration=duration,
            cpu_percent=cpu_percent,
            io_reads=io.read_count,
            io_writes=io.write_count,
            io_read_bytes=io.read_bytes,
            io_write_bytes=io.write_bytes,
            io_read_time=io.read_time,
            io_write_time=io.write_time,
        )

        params.update(extra_params)

        message = prefix + ' - Wall time: {duration:.0f}s; ' \
            'CPU: {cpu_percent:.0f}%; ' \
            'Read bytes: {io_read_bytes}; Write bytes: {io_write_bytes}; ' \
            'Read time: {io_read_time}; Write time: {io_write_time}'

        self.log(logging.WARNING, m_type, params, message)
Example #28
0
    def test_as_dict(self):
        monitor = SystemResourceMonitor(poll_interval=0.25)

        monitor.start()
        time.sleep(0.1)
        monitor.begin_phase("phase1")
        monitor.record_event("foo")
        time.sleep(0.1)
        monitor.begin_phase("phase2")
        monitor.record_event("bar")
        time.sleep(0.2)
        monitor.finish_phase("phase1")
        time.sleep(0.2)
        monitor.finish_phase("phase2")
        time.sleep(0.4)
        monitor.stop()

        d = monitor.as_dict()

        self.assertEqual(d["version"], 2)
        self.assertEqual(len(d["events"]), 2)
        self.assertEqual(len(d["phases"]), 2)
        self.assertIn("system", d)
        self.assertIsInstance(d["system"], dict)
        self.assertIsInstance(d["overall"], dict)
        self.assertIn("duration", d["overall"])
        self.assertIn("cpu_times", d["overall"])