def test_invalid_batch(self): batch = PROTO_FACTORY.CellsBatch() # Since the batch isn't defined as the last batch, the QueryResultsIterator # expects another batch and thus raises IndexError as no next batch exists. with self.assertRaises(IndexError): qr_iterator = TraceProcessor.QueryResultIterator([], [batch])
def test_null_cells_as_pandas(self): int_values = [100, 200, 300, 500, 600] str_values = ['bar1', 'bar2', 'bar3'] batch = PROTO_FACTORY.CellsBatch() batch.cells.extend([ TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT, TestQueryResultIterator.CELL_VARINT, TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT, TestQueryResultIterator.CELL_NULL, TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT, TestQueryResultIterator.CELL_VARINT, ]) batch.varint_cells.extend(int_values) batch.string_cells = "\0".join(str_values) + "\0" batch.is_last_batch = True qr_iterator = TraceProcessor.QueryResultIterator( ['foo_id', 'foo_num', 'foo_num_2'], [batch]) qr_df = qr_iterator.as_pandas_dataframe() # Any cell (and thus column in a row) can be set to null # In this query result, foo_num_2 of row 2 was set to null # Test to see that all the rows are still returned correctly int_values_check = [100, 200, 300, None, 500, 600] for num, row in qr_df.iterrows(): self.assertEqual(row['foo_id'], str_values[num]) self.assertEqual(row['foo_num'], int_values_check[num * 2]) self.assertEqual(row['foo_num_2'], int_values_check[num * 2 + 1])
def test_one_batch_as_pandas(self): int_values = [100, 200] str_values = ['bar1', 'bar2'] batch = PROTO_FACTORY.CellsBatch() batch.cells.extend([ TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT, TestQueryResultIterator.CELL_NULL, TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT, TestQueryResultIterator.CELL_NULL, ]) batch.varint_cells.extend(int_values) batch.string_cells = "\0".join(str_values) + "\0" batch.is_last_batch = True qr_iterator = TraceProcessor.QueryResultIterator( ['foo_id', 'foo_num', 'foo_null'], [batch]) qr_df = qr_iterator.as_pandas_dataframe() for num, row in qr_df.iterrows(): self.assertEqual(row['foo_id'], str_values[num]) self.assertEqual(row['foo_num'], int_values[num]) self.assertEqual(row['foo_null'], None)
def test_empty_batch(self): batch = PROTO_FACTORY.CellsBatch() batch.is_last_batch = True qr_iterator = TraceProcessor.QueryResultIterator([], [batch]) for num, row in enumerate(qr_iterator): self.assertIsNone(row.foo_id) self.assertIsNone(row.foo_num)
def test_empty_batch_as_pandas(self): batch = PROTO_FACTORY.CellsBatch() batch.is_last_batch = True qr_iterator = TraceProcessor.QueryResultIterator([], [batch]) qr_df = qr_iterator.as_pandas_dataframe() for num, row in qr_df.iterrows(): self.assertEqual(row['foo_id'], str_values[num]) self.assertEqual(row['foo_num'], int_values[num])
def _create_tp( self, trace: ResolverRegistry.Result ) -> Optional[Tuple[TraceProcessor, Metadata]]: try: return TraceProcessor(trace=trace.generator, config=self.config.tp_config), trace.metadata except TraceProcessorException as ex: if self.config.load_failure_handling == FailureHandling.RAISE_EXCEPTION: raise ex self._stats.load_failures += 1 return None
def test_incorrect_columns_batch(self): batch = PROTO_FACTORY.CellsBatch() batch.cells.extend([ TestQueryResultIterator.CELL_VARINT, TestQueryResultIterator.CELL_VARINT ]) batch.varint_cells.extend([100, 200]) batch.is_last_batch = True # It's always the case that the number of cells is a multiple of the number # of columns. However, here this is clearly not the case, so raise a # TraceProcessorException during the data integrity check in the constructor with self.assertRaises(TraceProcessorException): qr_iterator = TraceProcessor.QueryResultIterator( ['foo_id', 'foo_num', 'foo_dur', 'foo_ms'], [batch])
def compute_breakdown_for_startup(tp: TraceProcessor, package_name=None, process_name=None): """Computes the slice breakdown (like |compute_breakdown|) but only considering slices which happened during an app startup Args: tp: the trace processor instance to query. package_name: optional package name to filter for startups. Only a single startup matching this package name should be present. If not specified, only a single startup of any app should be in the trace. process_name: optional process name to filter for slices; specifying this argument can make computing the breakdown a lot faster. Returns: The same as |compute_breakdown| but only containing slices which happened during app startup. """ tp.metric(['android_startup']) # Verify there was only one startup in the trace matching the package # name. filter = "WHERE package = '{}'".format(package_name) if package_name else '' launches = tp.query(''' SELECT ts, ts_end, dur FROM launches {} '''.format(filter)).as_pandas_dataframe() if len(launches) == 0: raise TraceProcessorException("Didn't find startup in trace") if len(launches) > 1: raise TraceProcessorException("Found multiple startups in trace") start = launches['ts'][0] end = launches['ts_end'][0] return compute_breakdown(tp, start, end, process_name)
def test_invalid_cell_type_as_pandas(self): batch = PROTO_FACTORY.CellsBatch() batch.cells.extend([ TestQueryResultIterator.CELL_INVALID, TestQueryResultIterator.CELL_VARINT ]) batch.varint_cells.extend([100, 200]) batch.is_last_batch = True qr_iterator = TraceProcessor.QueryResultIterator(['foo_id', 'foo_num'], [batch]) # In this batch we declare the columns types to be CELL_INVALID, # CELL_VARINT but that doesn't match the data which are both ints* # so we should raise a TraceProcessorException. with self.assertRaises(TraceProcessorException): _ = qr_iterator.as_pandas_dataframe()
def test_incorrect_cells_batch_as_pandas(self): str_values = ['bar1', 'bar2'] batch = PROTO_FACTORY.CellsBatch() batch.cells.extend([ TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT, TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT ]) batch.string_cells = "\0".join(str_values) + "\0" batch.is_last_batch = True qr_iterator = TraceProcessor.QueryResultIterator(['foo_id', 'foo_num'], [batch]) # The batch specifies there ought to be 2 cells of type VARINT and 2 cells # of type STRING, but there are no string cells defined in the batch. Thus # an IndexError occurs as it tries to access the empty string cells list. with self.assertRaises(IndexError): _ = qr_iterator.as_pandas_dataframe()
def test_many_batches(self): int_values = [100, 200, 300, 400] str_values = ['bar1', 'bar2', 'bar3', 'bar4'] batch_1 = PROTO_FACTORY.CellsBatch() batch_1.cells.extend([ TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT, TestQueryResultIterator.CELL_NULL, TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT, TestQueryResultIterator.CELL_NULL, ]) batch_1.varint_cells.extend(int_values[:2]) batch_1.string_cells = "\0".join(str_values[:2]) + "\0" batch_1.is_last_batch = False batch_2 = PROTO_FACTORY.CellsBatch() batch_2.cells.extend([ TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT, TestQueryResultIterator.CELL_NULL, TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT, TestQueryResultIterator.CELL_NULL, ]) batch_2.varint_cells.extend(int_values[2:]) batch_2.string_cells = "\0".join(str_values[2:]) + "\0" batch_2.is_last_batch = True qr_iterator = TraceProcessor.QueryResultIterator( ['foo_id', 'foo_num', 'foo_null'], [batch_1, batch_2]) for num, row in enumerate(qr_iterator): self.assertEqual(row.foo_id, str_values[num]) self.assertEqual(row.foo_num, int_values[num]) self.assertEqual(row.foo_null, None)
def create_tp(trace: TraceReference): return TraceProcessor( trace=trace, config=TraceProcessorConfig(bin_path=os.environ["SHELL_PATH"]))
def compute_breakdown(tp: TraceProcessor, start_ts=None, end_ts=None, process_name=None): """For each userspace slice in the trace processor instance |tp|, computes the self-time of that slice grouping by process name, thread name and thread state. Args: tp: the trace processor instance to query. start_ts: optional bound to only consider slices after this ts end_ts: optional bound to only consider slices until this ts process_name: optional process name to filter for slices; specifying this argument can make computing the breakdown a lot faster. Returns: A Pandas dataframe containing the total self time taken by a slice stack broken down by process name, thread name and thread state. """ bounds = tp.query('SELECT * FROM trace_bounds').as_pandas_dataframe() start_ts = start_ts if start_ts else bounds['start_ts'][0] end_ts = end_ts if end_ts else bounds['end_ts'][0] tp.query(""" DROP VIEW IF EXISTS modded_names """) tp.query(""" CREATE VIEW modded_names AS SELECT slice.id, slice.depth, slice.stack_id, CASE WHEN slice.name LIKE 'Choreographer#doFrame%' THEN 'Choreographer#doFrame' WHEN slice.name LIKE 'DrawFrames%' THEN 'DrawFrames' WHEN slice.name LIKE '/data/app%.apk' THEN 'APK load' WHEN slice.name LIKE 'OpenDexFilesFromOat%' THEN 'OpenDexFilesFromOat' WHEN slice.name LIKE 'Open oat file%' THEN 'Open oat file' ELSE slice.name END AS modded_name FROM slice """) tp.query(""" DROP VIEW IF EXISTS thread_slice_stack """) tp.query(""" CREATE VIEW thread_slice_stack AS SELECT efs.ts, efs.dur, IFNULL(n.stack_id, -1) AS stack_id, t.utid, IIF(efs.source_id IS NULL, '[No slice]', IFNULL( ( SELECT GROUP_CONCAT(modded_name, ' > ') FROM ( SELECT p.modded_name FROM ancestor_slice(efs.source_id) a JOIN modded_names p ON a.id = p.id ORDER BY p.depth ) ) || ' > ' || n.modded_name, n.modded_name )) AS stack_name FROM experimental_flat_slice({}, {}) efs LEFT JOIN modded_names n ON efs.source_id = n.id JOIN thread_track t ON t.id = efs.track_id """.format(start_ts, end_ts)) tp.query(""" DROP TABLE IF EXISTS thread_slice_stack_with_state """) tp.query(""" CREATE VIRTUAL TABLE thread_slice_stack_with_state USING SPAN_JOIN( thread_slice_stack PARTITIONED utid, thread_state PARTITIONED utid ) """) if process_name: where_process = "AND process.name = '{}'".format(process_name) else: where_process = '' breakdown = tp.query(""" SELECT process.name AS process_name, thread.name AS thread_name, CASE WHEN slice.state = 'D' and slice.io_wait THEN 'Uninterruptible sleep (IO)' WHEN slice.state = 'DK' and slice.io_wait THEN 'Uninterruptible sleep + Wake-kill (IO)' WHEN slice.state = 'D' and not slice.io_wait THEN 'Uninterruptible sleep (non-IO)' WHEN slice.state = 'DK' and not slice.io_wait THEN 'Uninterruptible sleep + Wake-kill (non-IO)' WHEN slice.state = 'D' THEN 'Uninterruptible sleep' WHEN slice.state = 'DK' THEN 'Uninterruptible sleep + Wake-kill' WHEN slice.state = 'S' THEN 'Interruptible sleep' WHEN slice.state = 'R' THEN 'Runnable' WHEN slice.state = 'R+' THEN 'Runnable (Preempted)' ELSE slice.state END AS state, slice.stack_name, SUM(slice.dur)/1e6 AS dur_sum, MIN(slice.dur/1e6) AS dur_min, MAX(slice.dur/1e6) AS dur_max, AVG(slice.dur/1e6) AS dur_mean, PERCENTILE(slice.dur/1e6, 50) AS dur_median, PERCENTILE(slice.dur/1e6, 25) AS dur_25_percentile, PERCENTILE(slice.dur/1e6, 75) AS dur_75_percentile, PERCENTILE(slice.dur/1e6, 95) AS dur_95_percentile, PERCENTILE(slice.dur/1e6, 99) AS dur_99_percentile, COUNT(1) as count FROM process JOIN thread USING (upid) JOIN thread_slice_stack_with_state slice USING (utid) WHERE dur != -1 {} GROUP BY thread.name, stack_id, state ORDER BY dur_sum DESC """.format(where_process)).as_pandas_dataframe() return breakdown