Exemple #1
0
  def test_invalid_batch(self):
    batch = PROTO_FACTORY.CellsBatch()

    # Since the batch isn't defined as the last batch, the QueryResultsIterator
    # expects another batch and thus raises IndexError as no next batch exists.
    with self.assertRaises(IndexError):
      qr_iterator = TraceProcessor.QueryResultIterator([], [batch])
Exemple #2
0
  def test_null_cells_as_pandas(self):
    int_values = [100, 200, 300, 500, 600]
    str_values = ['bar1', 'bar2', 'bar3']

    batch = PROTO_FACTORY.CellsBatch()
    batch.cells.extend([
        TestQueryResultIterator.CELL_STRING,
        TestQueryResultIterator.CELL_VARINT,
        TestQueryResultIterator.CELL_VARINT,
        TestQueryResultIterator.CELL_STRING,
        TestQueryResultIterator.CELL_VARINT,
        TestQueryResultIterator.CELL_NULL,
        TestQueryResultIterator.CELL_STRING,
        TestQueryResultIterator.CELL_VARINT,
        TestQueryResultIterator.CELL_VARINT,
    ])
    batch.varint_cells.extend(int_values)
    batch.string_cells = "\0".join(str_values) + "\0"
    batch.is_last_batch = True

    qr_iterator = TraceProcessor.QueryResultIterator(
        ['foo_id', 'foo_num', 'foo_num_2'], [batch])
    qr_df = qr_iterator.as_pandas_dataframe()

    # Any cell (and thus column in a row) can be set to null
    # In this query result, foo_num_2 of row 2 was set to null
    # Test to see that all the rows are still returned correctly
    int_values_check = [100, 200, 300, None, 500, 600]
    for num, row in qr_df.iterrows():
      self.assertEqual(row['foo_id'], str_values[num])
      self.assertEqual(row['foo_num'], int_values_check[num * 2])
      self.assertEqual(row['foo_num_2'], int_values_check[num * 2 + 1])
Exemple #3
0
  def test_one_batch_as_pandas(self):
    int_values = [100, 200]
    str_values = ['bar1', 'bar2']

    batch = PROTO_FACTORY.CellsBatch()
    batch.cells.extend([
        TestQueryResultIterator.CELL_STRING,
        TestQueryResultIterator.CELL_VARINT,
        TestQueryResultIterator.CELL_NULL,
        TestQueryResultIterator.CELL_STRING,
        TestQueryResultIterator.CELL_VARINT,
        TestQueryResultIterator.CELL_NULL,
    ])
    batch.varint_cells.extend(int_values)
    batch.string_cells = "\0".join(str_values) + "\0"
    batch.is_last_batch = True

    qr_iterator = TraceProcessor.QueryResultIterator(
        ['foo_id', 'foo_num', 'foo_null'], [batch])

    qr_df = qr_iterator.as_pandas_dataframe()
    for num, row in qr_df.iterrows():
      self.assertEqual(row['foo_id'], str_values[num])
      self.assertEqual(row['foo_num'], int_values[num])
      self.assertEqual(row['foo_null'], None)
Exemple #4
0
  def test_empty_batch(self):
    batch = PROTO_FACTORY.CellsBatch()
    batch.is_last_batch = True

    qr_iterator = TraceProcessor.QueryResultIterator([], [batch])

    for num, row in enumerate(qr_iterator):
      self.assertIsNone(row.foo_id)
      self.assertIsNone(row.foo_num)
Exemple #5
0
  def test_empty_batch_as_pandas(self):
    batch = PROTO_FACTORY.CellsBatch()
    batch.is_last_batch = True

    qr_iterator = TraceProcessor.QueryResultIterator([], [batch])

    qr_df = qr_iterator.as_pandas_dataframe()
    for num, row in qr_df.iterrows():
      self.assertEqual(row['foo_id'], str_values[num])
      self.assertEqual(row['foo_num'], int_values[num])
Exemple #6
0
 def _create_tp(
     self, trace: ResolverRegistry.Result
 ) -> Optional[Tuple[TraceProcessor, Metadata]]:
     try:
         return TraceProcessor(trace=trace.generator,
                               config=self.config.tp_config), trace.metadata
     except TraceProcessorException as ex:
         if self.config.load_failure_handling == FailureHandling.RAISE_EXCEPTION:
             raise ex
         self._stats.load_failures += 1
         return None
Exemple #7
0
  def test_incorrect_columns_batch(self):
    batch = PROTO_FACTORY.CellsBatch()
    batch.cells.extend([
        TestQueryResultIterator.CELL_VARINT, TestQueryResultIterator.CELL_VARINT
    ])
    batch.varint_cells.extend([100, 200])
    batch.is_last_batch = True

    # It's always the case that the number of cells is a multiple of the number
    # of columns. However, here this is clearly not the case, so raise a
    # TraceProcessorException during the data integrity check in the constructor
    with self.assertRaises(TraceProcessorException):
      qr_iterator = TraceProcessor.QueryResultIterator(
          ['foo_id', 'foo_num', 'foo_dur', 'foo_ms'], [batch])
Exemple #8
0
def compute_breakdown_for_startup(tp: TraceProcessor,
                                  package_name=None,
                                  process_name=None):
  """Computes the slice breakdown (like |compute_breakdown|) but only
  considering slices which happened during an app startup

  Args:
    tp: the trace processor instance to query.
    package_name: optional package name to filter for startups. Only a single
        startup matching this package name should be present. If not specified,
        only a single startup of any app should be in the trace.
    process_name: optional process name to filter for slices; specifying
        this argument can make computing the breakdown a lot faster.

  Returns:
    The same as |compute_breakdown| but only containing slices which happened
    during app startup.
  """
  tp.metric(['android_startup'])

  # Verify there was only one startup in the trace matching the package
  # name.
  filter = "WHERE package = '{}'".format(package_name) if package_name else ''
  launches = tp.query('''
    SELECT ts, ts_end, dur
    FROM launches
    {}
  '''.format(filter)).as_pandas_dataframe()
  if len(launches) == 0:
    raise TraceProcessorException("Didn't find startup in trace")
  if len(launches) > 1:
    raise TraceProcessorException("Found multiple startups in trace")

  start = launches['ts'][0]
  end = launches['ts_end'][0]

  return compute_breakdown(tp, start, end, process_name)
Exemple #9
0
  def test_invalid_cell_type_as_pandas(self):
    batch = PROTO_FACTORY.CellsBatch()
    batch.cells.extend([
        TestQueryResultIterator.CELL_INVALID,
        TestQueryResultIterator.CELL_VARINT
    ])
    batch.varint_cells.extend([100, 200])
    batch.is_last_batch = True

    qr_iterator = TraceProcessor.QueryResultIterator(['foo_id', 'foo_num'],
                                                     [batch])

    # In this batch we declare the columns types to be CELL_INVALID,
    # CELL_VARINT but that doesn't match the data which are both ints*
    # so we should raise a TraceProcessorException.
    with self.assertRaises(TraceProcessorException):
      _ = qr_iterator.as_pandas_dataframe()
Exemple #10
0
  def test_incorrect_cells_batch_as_pandas(self):
    str_values = ['bar1', 'bar2']

    batch = PROTO_FACTORY.CellsBatch()
    batch.cells.extend([
        TestQueryResultIterator.CELL_STRING,
        TestQueryResultIterator.CELL_VARINT,
        TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT
    ])
    batch.string_cells = "\0".join(str_values) + "\0"
    batch.is_last_batch = True

    qr_iterator = TraceProcessor.QueryResultIterator(['foo_id', 'foo_num'],
                                                     [batch])

    # The batch specifies there ought to be 2 cells of type VARINT and 2 cells
    # of type STRING, but there are no string cells defined in the batch. Thus
    # an IndexError occurs as it tries to access the empty string cells list.
    with self.assertRaises(IndexError):
      _ = qr_iterator.as_pandas_dataframe()
Exemple #11
0
  def test_many_batches(self):
    int_values = [100, 200, 300, 400]
    str_values = ['bar1', 'bar2', 'bar3', 'bar4']

    batch_1 = PROTO_FACTORY.CellsBatch()
    batch_1.cells.extend([
        TestQueryResultIterator.CELL_STRING,
        TestQueryResultIterator.CELL_VARINT,
        TestQueryResultIterator.CELL_NULL,
        TestQueryResultIterator.CELL_STRING,
        TestQueryResultIterator.CELL_VARINT,
        TestQueryResultIterator.CELL_NULL,
    ])
    batch_1.varint_cells.extend(int_values[:2])
    batch_1.string_cells = "\0".join(str_values[:2]) + "\0"
    batch_1.is_last_batch = False

    batch_2 = PROTO_FACTORY.CellsBatch()
    batch_2.cells.extend([
        TestQueryResultIterator.CELL_STRING,
        TestQueryResultIterator.CELL_VARINT,
        TestQueryResultIterator.CELL_NULL,
        TestQueryResultIterator.CELL_STRING,
        TestQueryResultIterator.CELL_VARINT,
        TestQueryResultIterator.CELL_NULL,
    ])
    batch_2.varint_cells.extend(int_values[2:])
    batch_2.string_cells = "\0".join(str_values[2:]) + "\0"
    batch_2.is_last_batch = True

    qr_iterator = TraceProcessor.QueryResultIterator(
        ['foo_id', 'foo_num', 'foo_null'], [batch_1, batch_2])

    for num, row in enumerate(qr_iterator):
      self.assertEqual(row.foo_id, str_values[num])
      self.assertEqual(row.foo_num, int_values[num])
      self.assertEqual(row.foo_null, None)
def create_tp(trace: TraceReference):
  return TraceProcessor(
      trace=trace,
      config=TraceProcessorConfig(bin_path=os.environ["SHELL_PATH"]))
Exemple #13
0
def compute_breakdown(tp: TraceProcessor,
                      start_ts=None,
                      end_ts=None,
                      process_name=None):
  """For each userspace slice in the trace processor instance |tp|, computes
  the self-time of that slice grouping by process name, thread name
  and thread state.

  Args:
    tp: the trace processor instance to query.
    start_ts: optional bound to only consider slices after this ts
    end_ts: optional bound to only consider slices until this ts
    process_name: optional process name to filter for slices; specifying
        this argument can make computing the breakdown a lot faster.

  Returns:
    A Pandas dataframe containing the total self time taken by a slice stack
    broken down by process name, thread name and thread state.
  """
  bounds = tp.query('SELECT * FROM trace_bounds').as_pandas_dataframe()
  start_ts = start_ts if start_ts else bounds['start_ts'][0]
  end_ts = end_ts if end_ts else bounds['end_ts'][0]

  tp.query("""
    DROP VIEW IF EXISTS modded_names
  """)

  tp.query("""
    CREATE VIEW modded_names AS
    SELECT
      slice.id,
      slice.depth,
      slice.stack_id,
      CASE
        WHEN slice.name LIKE 'Choreographer#doFrame%'
          THEN 'Choreographer#doFrame'
        WHEN slice.name LIKE 'DrawFrames%'
          THEN 'DrawFrames'
        WHEN slice.name LIKE '/data/app%.apk'
          THEN 'APK load'
        WHEN slice.name LIKE 'OpenDexFilesFromOat%'
          THEN 'OpenDexFilesFromOat'
        WHEN slice.name LIKE 'Open oat file%'
          THEN 'Open oat file'
        ELSE slice.name
      END AS modded_name
    FROM slice
  """)

  tp.query("""
    DROP VIEW IF EXISTS thread_slice_stack
  """)

  tp.query("""
    CREATE VIEW thread_slice_stack AS
    SELECT
      efs.ts,
      efs.dur,
      IFNULL(n.stack_id, -1) AS stack_id,
      t.utid,
      IIF(efs.source_id IS NULL, '[No slice]', IFNULL(
        (
          SELECT GROUP_CONCAT(modded_name, ' > ')
          FROM (
            SELECT p.modded_name
            FROM ancestor_slice(efs.source_id) a
            JOIN modded_names p ON a.id = p.id
            ORDER BY p.depth
          )
        ) || ' > ' || n.modded_name,
        n.modded_name
      )) AS stack_name
    FROM experimental_flat_slice({}, {}) efs
    LEFT JOIN modded_names n ON efs.source_id = n.id
    JOIN thread_track t ON t.id = efs.track_id
  """.format(start_ts, end_ts))

  tp.query("""
    DROP TABLE IF EXISTS thread_slice_stack_with_state
  """)

  tp.query("""
    CREATE VIRTUAL TABLE thread_slice_stack_with_state
    USING SPAN_JOIN(
      thread_slice_stack PARTITIONED utid,
      thread_state PARTITIONED utid
    )
  """)

  if process_name:
    where_process = "AND process.name = '{}'".format(process_name)
  else:
    where_process = ''

  breakdown = tp.query("""
    SELECT
      process.name AS process_name,
      thread.name AS thread_name,
      CASE
        WHEN slice.state = 'D' and slice.io_wait
          THEN 'Uninterruptible sleep (IO)'
        WHEN slice.state = 'DK' and slice.io_wait
          THEN 'Uninterruptible sleep + Wake-kill (IO)'
        WHEN slice.state = 'D' and not slice.io_wait
          THEN 'Uninterruptible sleep (non-IO)'
        WHEN slice.state = 'DK' and not slice.io_wait
          THEN 'Uninterruptible sleep + Wake-kill (non-IO)'
        WHEN slice.state = 'D'
          THEN 'Uninterruptible sleep'
        WHEN slice.state = 'DK'
          THEN 'Uninterruptible sleep + Wake-kill'
        WHEN slice.state = 'S' THEN 'Interruptible sleep'
        WHEN slice.state = 'R' THEN 'Runnable'
        WHEN slice.state = 'R+' THEN 'Runnable (Preempted)'
        ELSE slice.state
      END AS state,
      slice.stack_name,
      SUM(slice.dur)/1e6 AS dur_sum,
      MIN(slice.dur/1e6) AS dur_min,
      MAX(slice.dur/1e6) AS dur_max,
      AVG(slice.dur/1e6) AS dur_mean,
      PERCENTILE(slice.dur/1e6, 50) AS dur_median,
      PERCENTILE(slice.dur/1e6, 25) AS dur_25_percentile,
      PERCENTILE(slice.dur/1e6, 75) AS dur_75_percentile,
      PERCENTILE(slice.dur/1e6, 95) AS dur_95_percentile,
      PERCENTILE(slice.dur/1e6, 99) AS dur_99_percentile,
      COUNT(1) as count
    FROM process
    JOIN thread USING (upid)
    JOIN thread_slice_stack_with_state slice USING (utid)
    WHERE dur != -1 {}
    GROUP BY thread.name, stack_id, state
    ORDER BY dur_sum DESC
  """.format(where_process)).as_pandas_dataframe()

  return breakdown