Esempio n. 1
0
    def _fix_indexes(self, data_frame):
        """
        In case of multiple traces with different indexes (i.e. x-axis values),
        create new ones with same indexes
        """
        # 1) Check if we are processing multiple traces
        if len(data_frame) > 1:
            # 2) Merge the data frames to obtain common indexes
            df_columns = list(data_frame.keys())
            dedup_data = [handle_duplicate_index(s) for s in data_frame.values]
            data_frame = pd.Series(dedup_data, index=df_columns)
            merged_df = pd.concat(data_frame.get_values(), axis=1)
            merged_df.columns = df_columns
            # 3) Fill NaN values depending on drawstyle
            if self._attr["drawstyle"] == "steps-post":
                merged_df = merged_df.ffill()
            elif self._attr["drawstyle"] == "steps-pre":
                merged_df = merged_df.bfill()
            elif self._attr["drawstyle"] == "steps-mid":
                merged_df = merged_df.ffill()
            else:
                # default
                merged_df = merged_df.interpolate()

            return merged_df
        else:
            return data_frame
Esempio n. 2
0
    def _fix_indexes(self, data_dict):
        """
        In case of multiple traces with different indexes (i.e. x-axis values),
        create new ones with same indexes
        """
        # 1) Check if we are processing multiple traces
        if len(data_dict) <= 1:
            raise ValueError("Cannot fix indexes for single trace. "\
                             "Expecting multiple traces!")

        # 2) Merge the data frames to obtain common indexes
        df_columns = list(data_dict.keys())
        dedup_data = [handle_duplicate_index(s) for s in data_dict.values()]
        ret = pd.Series(dedup_data, index=df_columns)
        merged_df = pd.concat(ret.get_values(), axis=1)
        merged_df.columns = df_columns
        # 3) Fill NaN values depending on drawstyle
        if self._attr["drawstyle"] == "steps-post":
            merged_df = merged_df.ffill()
        elif self._attr["drawstyle"] == "steps-pre":
            merged_df = merged_df.bfill()
        elif self._attr["drawstyle"] == "steps-mid":
            merged_df = merged_df.ffill()
        else:
            # default
            merged_df = merged_df.interpolate()

        return merged_df
Esempio n. 3
0
    def _pivot(self, cls, column):
        """Pivot Data for concatenation"""

        data_frame = self._get_data_frame(cls)
        if data_frame.empty:
            raise ValueError("No events found for {}".format(cls.name))

        data_frame = handle_duplicate_index(data_frame)
        new_index = self._agg_df.index.union(data_frame.index)

        if hasattr(cls, "pivot") and cls.pivot:
            pivot = cls.pivot
            pivot_vals = list(np.unique(data_frame[pivot].values))
            data = {}


            for val in pivot_vals:
                data[val] = data_frame[data_frame[pivot] == val][[column]]
                if len(self._agg_df):
                    data[val] = data[val].reindex(
                        index=new_index,
                        method=self._method,
                        limit=self._limit)

            return pd.concat(data, axis=1).swaplevel(0, 1, axis=1)

        if len(self._agg_df):
            data_frame = data_frame.reindex(
                index=new_index,
                method=self._method,
                limit=self._limit)

        return pd.concat({StatConf.GRAMMAR_DEFAULT_PIVOT: data_frame[
                         [column]]}, axis=1).swaplevel(0, 1, axis=1)
Esempio n. 4
0
    def signal_cpu_active(self, cpu):
        """
        Build a square wave representing the active (i.e. non-idle) CPU time

        :param cpu: CPU ID
        :type cpu: int

        :returns: A :class:`pandas.Series` that equals 1 at timestamps where the
          CPU is reported to be non-idle, 0 otherwise
        """
        idle_df = self.trace.df_events('cpu_idle')
        cpu_df = idle_df[idle_df.cpu_id == cpu]

        cpu_active = cpu_df.state.apply(lambda s: 1 if s == -1 else 0)

        start_time = self.trace.start

        if cpu_active.empty:
            cpu_active = pd.Series([0], index=[start_time])
        elif cpu_active.index[0] != start_time:
            entry_0 = pd.Series(cpu_active.iloc[0] ^ 1, index=[start_time])
            cpu_active = pd.concat([entry_0, cpu_active])

        # Fix sequences of wakeup/sleep events reported with the same index
        return handle_duplicate_index(cpu_active)
Esempio n. 5
0
    def _pivot(self, cls, column):
        """Pivot Data for concatenation"""

        data_frame = self._get_data_frame(cls)
        data_frame = handle_duplicate_index(data_frame)
        new_index = self._agg_df.index.union(data_frame.index)

        if hasattr(cls, "pivot") and cls.pivot:
            pivot = cls.pivot
            pivot_vals = list(np.unique(data_frame[pivot].values))
            data = {}


            for val in pivot_vals:
                data[val] = data_frame[data_frame[pivot] == val][[column]]
                if len(self._agg_df):
                    data[val] = data[val].reindex(
                        index=new_index,
                        method=self._method,
                        limit=self._limit)

            return pd.concat(data, axis=1).swaplevel(0, 1, axis=1)

        if len(self._agg_df):
            data_frame = data_frame.reindex(
                index=new_index,
                method=self._method,
                limit=self._limit)

        return pd.concat({StatConf.GRAMMAR_DEFAULT_PIVOT: data_frame[
                         [column]]}, axis=1).swaplevel(0, 1, axis=1)
    def test_handle_duplicate_index_duplicate_end(self):
        """handle_duplicate_index copes with duplicates at the end of the series"""

        max_delta = 0.001
        values = [0, 1, 2, 3, 4]
        index = [0.0, 1.0, 2.0, 6.0, 6.0]
        expected_index = index[:]
        expected_index[-1] += max_delta
        series = pandas.Series(values, index=index)
        expected_series = pandas.Series(values, index=expected_index)

        series = utils.handle_duplicate_index(series, max_delta)
        assert_series_equal(series, expected_series)
Esempio n. 7
0
    def test_handle_duplicate_index_duplicate_end(self):
        """handle_duplicate_index copes with duplicates at the end of the series"""

        max_delta = 0.001
        values = [0, 1, 2, 3, 4]
        index = [0.0, 1.0, 2.0, 6.0, 6.0]
        expected_index = index[:]
        expected_index[-1] += max_delta
        series = pandas.Series(values, index=index)
        expected_series = pandas.Series(values, index=expected_index)

        series = utils.handle_duplicate_index(series, max_delta)
        assert_series_equal(series, expected_series)
    def test_handle_duplicate_index(self):
        """Test Util Function: handle_duplicate_index
        """

        # Refer to the example in the function doc string
        values = [0, 1, 2, 3, 4]
        index = [0.0, 1.0, 1.0, 6.0, 7.0]
        series = pandas.Series(values, index=index)
        new_index = [0.0, 1.0, 2.0, 3.0, 4.0, 6.0, 7.0]

        with self.assertRaises(ValueError):
            series.reindex(new_index)

        max_delta = 0.001
        expected_index = [0.0, 1.0, 1 + max_delta, 6.0, 7.0]
        expected_series = pandas.Series(values, index=expected_index)
        series = utils.handle_duplicate_index(series, max_delta)
        assert_series_equal(series, expected_series)

        # Make sure that the reindex doesn't raise ValueError any more
        series.reindex(new_index)
Esempio n. 9
0
    def test_handle_duplicate_index(self):
        """Test Util Function: handle_duplicate_index
        """

        # Refer to the example in the function doc string
        values = [0, 1, 2, 3, 4]
        index = [0.0, 1.0, 1.0, 6.0, 7.0]
        series = pandas.Series(values, index=index)
        new_index = [0.0, 1.0, 2.0, 3.0, 4.0, 6.0, 7.0]

        with self.assertRaises(ValueError):
            series.reindex(new_index)

        max_delta = 0.001
        expected_index = [0.0, 1.0, 1 + max_delta, 6.0, 7.0]
        expected_series = pandas.Series(values, index=expected_index)
        series = utils.handle_duplicate_index(series, max_delta)
        assert_series_equal(series, expected_series)

        # Make sure that the reindex doesn't raise ValueError any more
        series.reindex(new_index)
Esempio n. 10
0
File: trace.py Progetto: credp/lisa
    def getCPUActiveSignal(self, cpu):
        """
        Build a square wave representing the active (i.e. non-idle) CPU time,
        i.e.:

          cpu_active[t] == 1 if the CPU is reported to be non-idle by cpuidle at
          time t
          cpu_active[t] == 0 otherwise

        :param cpu: CPU ID
        :type cpu: int

        :returns: A :mod:`pandas.Series` or ``None`` if the trace contains no
                  "cpu_idle" events
        """
        if not self.hasEvents('cpu_idle'):
            self._log.warning('Events [cpu_idle] not found, '
                              'cannot compute CPU active signal!')
            return None

        idle_df = self._dfg_trace_event('cpu_idle')
        cpu_df = idle_df[idle_df.cpu_id == cpu]

        cpu_active = cpu_df.state.apply(
            lambda s: 1 if s == NON_IDLE_STATE else 0
        )

        start_time = 0.0
        if not self.ftrace.normalized_time:
            start_time = self.ftrace.basetime

        if cpu_active.empty:
            cpu_active = pd.Series([0], index=[start_time])
        elif cpu_active.index[0] != start_time:
            entry_0 = pd.Series(cpu_active.iloc[0] ^ 1, index=[start_time])
            cpu_active = pd.concat([entry_0, cpu_active])

        # Fix sequences of wakeup/sleep events reported with the same index
        return handle_duplicate_index(cpu_active)
Esempio n. 11
0
    def getCPUActiveSignal(self, cpu):
        """
        Build a square wave representing the active (i.e. non-idle) CPU time,
        i.e.:

          cpu_active[t] == 1 if the CPU is reported to be non-idle by cpuidle at
          time t
          cpu_active[t] == 0 otherwise

        :param cpu: CPU ID
        :type cpu: int

        :returns: A :mod:`pandas.Series` or ``None`` if the trace contains no
                  "cpu_idle" events
        """
        if not self.hasEvents('cpu_idle'):
            self._log.warning('Events [cpu_idle] not found, '
                              'cannot compute CPU active signal!')
            return None

        idle_df = self._dfg_trace_event('cpu_idle')
        cpu_df = idle_df[idle_df.cpu_id == cpu]

        cpu_active = cpu_df.state.apply(
            lambda s: 1 if s == NON_IDLE_STATE else 0
        )

        start_time = 0.0
        if not self.ftrace.normalized_time:
            start_time = self.ftrace.basetime

        if cpu_active.empty:
            cpu_active = pd.Series([0], index=[start_time])
        elif cpu_active.index[0] != start_time:
            entry_0 = pd.Series(cpu_active.iloc[0] ^ 1, index=[start_time])
            cpu_active = pd.concat([entry_0, cpu_active])

        # Fix sequences of wakeup/sleep events reported with the same index
        return handle_duplicate_index(cpu_active)
Esempio n. 12
0
    def create_dataframe(self):
        """Create the final :mod:`pandas.DataFrame`"""
        if not self.time_array:
            return

        trace_arr_lengths = self.__get_trace_array_lengths()

        if trace_arr_lengths:
            for (idx, val) in enumerate(self.data_array):
                expl_val = trace_parser_explode_array(val, trace_arr_lengths)
                self.data_array[idx] = expl_val

        time_idx = pd.Index(self.time_array, name="Time")
        self.data_frame = pd.DataFrame(self.generate_parsed_data(),
                                       index=time_idx)
        self.data_frame = handle_duplicate_index(self.data_frame)
        self.optimize_dataframe()

        self.time_array = []
        self.line_array = []
        self.comm_array = []
        self.pid_array = []
        self.cpu_array = []
        self.data_array = []
    def _get_trace_metrics(self, trace_path):
        """
        Parse a trace (or used cached results) and extract extra metrics from it

        Returns a DataFrame with columns:

        metric,value,units
        """
        cache_path = os.path.join(os.path.dirname(trace_path),
                                  'lisa_trace_metrics.csv')
        if self.use_cached_trace_metrics and os.path.exists(cache_path):
            return pd.read_csv(cache_path)

        # I wonder if this should go in LISA itself? Probably.

        metrics = []
        events = [
            'irq_handler_entry', 'cpu_frequency', 'nohz_kick', 'sched_switch',
            'sched_load_cfs_rq', 'sched_load_avg_task', 'thermal_temperature'
        ]
        trace = Trace(self.platform, trace_path, events)

        metrics.append(
            ('cpu_wakeup_count', len(trace.data_frame.cpu_wakeups()), None))

        # Helper to get area under curve of multiple CPU active signals
        def get_cpu_time(trace, cpus):
            df = pd.DataFrame([trace.getCPUActiveSignal(cpu) for cpu in cpus])
            return df.sum(axis=1).sum(axis=0)

        clusters = trace.platform.get('clusters')
        if clusters:
            for cluster in clusters.values():
                name = '-'.join(str(c) for c in cluster)

                df = trace.data_frame.cluster_frequency_residency(cluster)
                if df is None or df.empty:
                    self._log.warning(
                        "Can't get cluster freq residency from %s",
                        trace.data_dir)
                else:
                    df = df.reset_index()
                    avg_freq = (df.frequency * df.time).sum() / df.time.sum()
                    metric = 'avg_freq_cluster_{}'.format(name)
                    metrics.append((metric, avg_freq, 'MHz'))

                df = trace.data_frame.trace_event('cpu_frequency')
                df = df[df.cpu == cluster[0]]
                metrics.append(
                    ('freq_transition_count_{}'.format(name), len(df), None))

                active_time = area_under_curve(
                    trace.getClusterActiveSignal(cluster))
                metrics.append(('active_time_cluster_{}'.format(name),
                                active_time, 'seconds'))

                metrics.append(('cpu_time_cluster_{}'.format(name),
                                get_cpu_time(trace, cluster), 'cpu-seconds'))

        metrics.append(
            ('cpu_time_total',
             get_cpu_time(trace,
                          range(trace.platform['cpus_count'])), 'cpu-seconds'))

        event = None
        if trace.hasEvents('sched_load_cfs_rq'):
            event = 'sched_load_cfs_rq'
            row_filter = lambda r: r.path == '/'
            column = 'util'
        elif trace.hasEvents('sched_load_avg_cpu'):
            event = 'sched_load_avg_cpu'
            row_filter = lambda r: True
            column = 'util_avg'
        if event:
            df = trace.data_frame.trace_event(event)
            util_sum = (handle_duplicate_index(df)[row_filter].pivot(
                columns='cpu')[column].ffill().sum(axis=1))
            avg_util_sum = area_under_curve(util_sum) / (util_sum.index[-1] -
                                                         util_sum.index[0])
            metrics.append(('avg_util_sum', avg_util_sum, None))

        if trace.hasEvents('thermal_temperature'):
            df = trace.data_frame.trace_event('thermal_temperature')
            for zone, zone_df in df.groupby('thermal_zone'):
                metrics.append(('tz_{}_start_temp'.format(zone),
                                zone_df.iloc[0]['temp_prev'], 'milliCelcius'))

                if len(zone_df == 1):  # Avoid division by 0
                    avg_tmp = zone_df['temp'].iloc[0]
                else:
                    avg_tmp = (area_under_curve(zone_df['temp']) /
                               (zone_df.index[-1] - zone_df.index[0]))

                metrics.append(
                    ('tz_{}_avg_temp'.format(zone), avg_tmp, 'milliCelcius'))

        ret = pd.DataFrame(metrics, columns=['metric', 'value', 'units'])
        ret.to_csv(cache_path, index=False)

        return ret
Esempio n. 14
0
    def _get_trace_metrics(self, trace_path):
        """
        Parse a trace (or used cached results) and extract extra metrics from it

        Returns a DataFrame with columns:

        metric,value,units
        """
        cache_path = os.path.join(os.path.dirname(trace_path), 'lisa_trace_metrics.csv')
        if self.use_cached_trace_metrics and os.path.exists(cache_path):
            return pd.read_csv(cache_path)

        # I wonder if this should go in LISA itself? Probably.

        metrics = []
        events = ['irq_handler_entry', 'cpu_frequency', 'nohz_kick', 'sched_switch',
                  'sched_load_cfs_rq', 'sched_load_avg_task', 'thermal_temperature']
        trace = Trace(self.platform, trace_path, events)

        metrics.append(('cpu_wakeup_count', len(trace.data_frame.cpu_wakeups()), None))

        # Helper to get area under curve of multiple CPU active signals
        def get_cpu_time(trace, cpus):
            df = pd.DataFrame([trace.getCPUActiveSignal(cpu) for cpu in cpus])
            return df.sum(axis=1).sum(axis=0)

        clusters = trace.platform.get('clusters')
        if clusters:
            for cluster in clusters.values():
                name = '-'.join(str(c) for c in cluster)

                df = trace.data_frame.cluster_frequency_residency(cluster)
                if df is None or df.empty:
                    self._log.warning("Can't get cluster freq residency from %s",
                                      trace.data_dir)
                else:
                    df = df.reset_index()
                    avg_freq = (df.frequency * df.time).sum() / df.time.sum()
                    metric = 'avg_freq_cluster_{}'.format(name)
                    metrics.append((metric, avg_freq, 'MHz'))

                df = trace.data_frame.trace_event('cpu_frequency')
                df = df[df.cpu == cluster[0]]
                metrics.append(('freq_transition_count_{}'.format(name), len(df), None))

                active_time = area_under_curve(trace.getClusterActiveSignal(cluster))
                metrics.append(('active_time_cluster_{}'.format(name),
                                active_time, 'seconds'))

                metrics.append(('cpu_time_cluster_{}'.format(name),
                                get_cpu_time(trace, cluster), 'cpu-seconds'))

        metrics.append(('cpu_time_total',
                        get_cpu_time(trace, range(trace.platform['cpus_count'])),
                        'cpu-seconds'))

        event = None
        if trace.hasEvents('sched_load_cfs_rq'):
            event = 'sched_load_cfs_rq'
            row_filter = lambda r: r.path == '/'
            column = 'util'
        elif trace.hasEvents('sched_load_avg_cpu'):
            event = 'sched_load_avg_cpu'
            row_filter = lambda r: True
            column = 'util_avg'
        if event:
            df = trace.data_frame.trace_event(event)
            util_sum = (handle_duplicate_index(df)[row_filter]
                        .pivot(columns='cpu')[column].ffill().sum(axis=1))
            avg_util_sum = area_under_curve(util_sum) / (util_sum.index[-1] - util_sum.index[0])
            metrics.append(('avg_util_sum', avg_util_sum, None))

        if trace.hasEvents('thermal_temperature'):
            df = trace.data_frame.trace_event('thermal_temperature')
            for zone, zone_df in df.groupby('thermal_zone'):
                metrics.append(('tz_{}_start_temp'.format(zone),
                                zone_df.iloc[0]['temp_prev'],
                                'milliCelcius'))

                if len(zone_df == 1): # Avoid division by 0
                    avg_tmp = zone_df['temp'].iloc[0]
                else:
                    avg_tmp = (area_under_curve(zone_df['temp'])
                               / (zone_df.index[-1] - zone_df.index[0]))

                metrics.append(('tz_{}_avg_temp'.format(zone),
                                avg_tmp,
                                'milliCelcius'))

        ret = pd.DataFrame(metrics, columns=['metric', 'value', 'units'])
        ret.to_csv(cache_path, index=False)

        return ret
Esempio n. 15
0
    def _handle_duplicate_index(self):
        """Handle duplicate values in index"""

        self._data = handle_duplicate_index(self._data)
        self._dup_resolved = True
Esempio n. 16
0
    def get_trace_metrics(self, trace_path):
        cache_path = os.path.join(os.path.dirname(trace_path),
                                  'lisa_trace_metrics.csv')
        if self.use_cached_trace_metrics and os.path.exists(cache_path):
            return pd.read_csv(cache_path)

        # I wonder if this should go in LISA itself? Probably.

        metrics = []
        events = [
            'irq_handler_entry', 'cpu_frequency', 'nohz_kick', 'sched_switch',
            'sched_load_cfs_rq', 'sched_load_avg_task'
        ]
        trace = Trace(self.platform, trace_path, events)

        if hasattr(trace.data_frame, 'cpu_wakeups'):  # Not merged in LISA yet
            metrics.append(('cpu_wakeup_count',
                            len(trace.data_frame.cpu_wakeups()), None))

        # Helper to get area under curve of multiple CPU active signals
        def get_cpu_time(trace, cpus):
            df = pd.DataFrame([trace.getCPUActiveSignal(cpu) for cpu in cpus])
            return df.sum(axis=1).sum(axis=0)

        clusters = trace.platform.get('clusters')
        if clusters:
            for cluster in clusters.values():
                name = '-'.join(str(c) for c in cluster)

                df = trace.data_frame.cluster_frequency_residency(cluster)
                if df is None or df.empty:
                    print "Can't get cluster freq residency from {}".format(
                        trace.data_dir)
                else:
                    df = df.reset_index()
                    avg_freq = (df.frequency * df.time).sum() / df.time.sum()
                    metric = 'avg_freq_cluster_{}'.format(name)
                    metrics.append((metric, avg_freq, 'MHz'))

                df = trace.data_frame.trace_event('cpu_frequency')
                df = df[df.cpu == cluster[0]]
                metrics.append(
                    ('freq_transition_count_{}'.format(name), len(df), None))

                active_time = area_under_curve(
                    trace.getClusterActiveSignal(cluster))
                metrics.append(('active_time_cluster_{}'.format(name),
                                active_time, 'seconds'))

                metrics.append(('cpu_time_cluster_{}'.format(name),
                                get_cpu_time(trace, cluster), 'cpu-seconds'))

        metrics.append(
            ('cpu_time_total',
             get_cpu_time(trace,
                          range(trace.platform['cpus_count'])), 'cpu-seconds'))

        event = None
        if trace.hasEvents('sched_load_cfs_rq'):
            event = 'sched_load_cfs_rq'
            row_filter = lambda r: r.path == '/'
            column = 'util'
        elif trace.hasEvents('sched_load_avg_cpu'):
            event = 'sched_load_avg_cpu'
            row_filter = lambda r: True
            column = 'util_avg'
        if event:
            df = trace.data_frame.trace_event(event)
            util_sum = (handle_duplicate_index(df)[row_filter].pivot(
                columns='cpu')[column].ffill().sum(axis=1))
            avg_util_sum = area_under_curve(util_sum) / (util_sum.index[-1] -
                                                         util_sum.index[0])
            metrics.append(('avg_util_sum', avg_util_sum, None))

        if trace.hasEvents('nohz_kick'):
            metrics.append(
                ('nohz_kick_count',
                 len(trace.data_frame.trace_event('nohz_kick')), None))

        ret = pd.DataFrame(metrics, columns=['metric', 'value', 'units'])
        if self.use_cached_trace_metrics:
            ret.to_csv(cache_path)

        return ret