Beispiel #1
0
  def test_chromeos_platform(self):
    """Test ChromeOS platform is written in stats."""
    self.client.insert.return_value = {'insertErrors': [{'index': 1}]}
    context = self._create_context('job_chromeos', 'linux')
    fuzz_task.write_crashes_to_big_query(self.group, context)

    success_count = monitoring_metrics.BIG_QUERY_WRITE_COUNT.get({
        'success': True
    })
    failure_count = monitoring_metrics.BIG_QUERY_WRITE_COUNT.get({
        'success': False
    })

    self.assertEqual(2, success_count)
    self.assertEqual(1, failure_count)

    self.mock.Client.assert_called_once_with(
        dataset_id='main', table_id='crashes$19700101')
    self.client.insert.assert_called_once_with([
        big_query.Insert(
            self._json('job_chromeos', 'chrome', 'c1', True, 't'),
            'key:bot:99:0'),
        big_query.Insert(
            self._json('job_chromeos', 'chrome', 'c2', False, None),
            'key:bot:99:1'),
        big_query.Insert(
            self._json('job_chromeos', 'chrome', 'c3', False, None),
            'key:bot:99:2')
    ])
Beispiel #2
0
  def test_all_succeed(self):
    """Test writing succeeds."""
    self.client.insert.return_value = {}
    context = self._create_context('job', 'linux')
    fuzz_task.write_crashes_to_big_query(self.group, context)

    success_count = monitoring_metrics.BIG_QUERY_WRITE_COUNT.get({
        'success': True
    })
    failure_count = monitoring_metrics.BIG_QUERY_WRITE_COUNT.get({
        'success': False
    })

    self.assertEqual(3, success_count)
    self.assertEqual(0, failure_count)

    self.mock.Client.assert_called_once_with(
        dataset_id='main', table_id='crashes$19700101')
    self.client.insert.assert_called_once_with([
        big_query.Insert(
            self._json('job', 'linux', 'c1', True, 't'), 'key:bot:99:0'),
        big_query.Insert(
            self._json('job', 'linux', 'c2', False, None), 'key:bot:99:1'),
        big_query.Insert(
            self._json('job', 'linux', 'c3', False, None), 'key:bot:99:2')
    ])
def _record_cross_pollination_stats(stats):
    """Log stats about cross pollination in BigQuery."""
    # If no stats were gathered due to a timeout or lack of corpus, return.
    if not stats:
        return
    # BigQuery not available in local development.This is necessary because the
    # untrusted runner is in a separate process and can't be easily mocked.
    if environment.get_value('LOCAL_DEVELOPMENT') or environment.get_value(
            'PY_UNITTESTS'):
        return

    bigquery_row = {
        'project_qualified_name': stats.project_qualified_name,
        'method': stats.method,
        'sources': stats.sources,
        'tags': stats.tag if stats.tag else '',
        'initial_corpus_size': stats.initial_corpus_size,
        'corpus_size': stats.minimized_corpus_size_units,
        'initial_edge_coverage': stats.initial_edge_coverage,
        'edge_coverage': stats.edge_coverage,
        'initial_feature_coverage': stats.initial_feature_coverate,
        'feature_coverage': stats.feature_coverage
    }

    client = big_query.Client(dataset_id='main',
                              table_id='cross_pollination_statistics')
    client.insert([big_query.Insert(row=bigquery_row, insert_id=None)])
Beispiel #4
0
def record_cross_pollination_stats(pruner_stats, pollinator_stats,
                                   project_qualified_name, sources, tag,
                                   initial_corpus_size,
                                   minimized_corpus_size_units, method):
    """Log stats about cross pollination in BigQuery."""
    # TODO(mpherman): Find a way to collect these stats for OSS Fuzz.
    if environment.is_untrusted_worker():
        return
    # BigQuery not available in local development.This is necessary because the
    # untrusted runner is in a separate process and can't be easily mocked.
    if environment.get_value('LOCAL_DEVELOPMENT') or environment.get_value(
            'PY_UNITTESTS'):
        return

    if not pruner_stats or not pollinator_stats:
        return

    bigquery_row = {
        'project_qualified_name': project_qualified_name,
        'method': method,
        'sources': sources,
        'tags': tag if tag else '',
        'initial_corpus_size': initial_corpus_size,
        'corpus_size': minimized_corpus_size_units,
        'initial_edge_coverage': pruner_stats['edge_coverage'],
        'edge_coverage': pollinator_stats['edge_coverage'],
        'initial_feature_coverage': pruner_stats['feature_coverage'],
        'feature_coverage': pollinator_stats['feature_coverage']
    }

    client = big_query.Client(dataset_id='main',
                              table_id='cross_pollination_statistics')
    client.insert([big_query.Insert(row=bigquery_row, insert_id=None)])
Beispiel #5
0
def _store_probabilities_in_bigquery(data):
    """Update a bigquery table containing the daily updated
  probability distribution over strategies."""
    bigquery_data = []

    # TODO(mukundv): Update once we choose a temperature parameter for final
    # implementation.
    for row in data:
        bigquery_row = {
            'strategy_name':
            row['strategy'],
            'probability_high_temperature':
            row['bandit_weight_high_temperature'],
            'probability_low_temperature':
            row['bandit_weight_low_temperature'],
            'probability_medium_temperature':
            row['bandit_weight_medium_temperature'],
            'run_count':
            row['run_count']
        }
        bigquery_data.append(big_query.Insert(row=bigquery_row,
                                              insert_id=None))

    client = big_query.Client(dataset_id='main',
                              table_id='fuzz_strategy_experiments')
    client.insert(bigquery_data)
    def test_error(self):
        """Tests error."""
        self.client.insert.return_value = {'insertErrors': ['exception']}
        big_query.write_range('regressions', self.testcase, 'regression', 456,
                              789)

        self.mock.log_error.assert_called_once_with(
            ("Ignoring error writing the testcase's regression range (%s) to "
             'BigQuery.' % self.testcase.key.id()),
            exception=mock.ANY)
        self.client.insert.assert_called_once_with([
            big_query.Insert(row={
                'testcase_id': str(self.testcase.key.id()),
                'crash_type': 'type',
                'crash_state': 'state',
                'security_flag': True,
                'parent_fuzzer_name': 'libfuzzer',
                'fuzzer_name': 'libfuzzer_pdf',
                'job_type': 'some_job',
                'created_at': 99,
                'regression_range_start': 456,
                'regression_range_end': 789,
            },
                             insert_id='%s:456:789' % self.testcase.key.id())
        ])
Beispiel #7
0
  def test_insert(self):
    """Test calling insertAll API."""
    underlying = mock.MagicMock()
    tabledata = mock.MagicMock()
    insert_all = mock.MagicMock()

    underlying.tabledata.return_value = tabledata
    tabledata.insertAll.return_value = insert_all
    insert_all.execute.return_value = {'test': 1}
    self.mock.get_api_client.return_value = underlying

    client = big_query.Client(dataset_id='data', table_id='table')

    self.assertDictEqual({
        'test': 1
    },
                         client.insert([
                             big_query.Insert({
                                 'a': 1
                             }, 'prefix:0'),
                             big_query.Insert({
                                 'b': 2
                             }, 'prefix:1')
                         ]))
    tabledata.insertAll.assert_called_once_with(
        projectId='project',
        datasetId='data',
        tableId='table',
        body={
            'kind':
                'bigquery#tableDataInsertAllRequest',
            'rows': [{
                'json': {
                    'a': 1
                },
                'insertId': 'prefix:0'
            }, {
                'json': {
                    'b': 2
                },
                'insertId': 'prefix:1'
            }]
        })
def store_current_weights_in_bigquery():
    """Update a bigquery table containing the daily stats."""
    rows = []
    target_jobs = ndb_utils.get_all_from_model(data_types.FuzzTargetJob)
    for target_job in target_jobs:
        row = {
            'fuzzer': target_job.fuzz_target_name,
            'job': target_job.job,
            'weight': target_job.weight
        }
        rows.append(big_query.Insert(row=row, insert_id=None))

    client = big_query.Client(dataset_id='main', table_id='fuzzer_weights')
    client.insert(rows)
Beispiel #9
0
def _store_probabilities_in_bigquery(data):
  """Update a bigquery table containing the daily updated
  probability distribution over strategies."""
  bigquery_data = []

  for row in data:
    bigquery_row = {
        'strategy_name': row['strategy'],
        'probability': row['bandit_weight'],
        'run_count': row['run_count']
    }
    bigquery_data.append(big_query.Insert(row=bigquery_row, insert_id=None))

  client = big_query.Client(
      dataset_id='main', table_id='fuzz_strategy_probability')
  client.insert(bigquery_data)
def _store_probabilities_in_bigquery(engine, data):
  """Update a bigquery table containing the daily updated
  probability distribution over strategies."""
  bigquery_data = []

  # TODO(mukundv): Update once we choose a temperature parameter for final
  # implementation.
  for row in data:
    bigquery_row = {
        'strategy_name': row['strategy'],
        'probability': row['bandit_weight'],
        'engine': engine.name
    }
    bigquery_data.append(big_query.Insert(row=bigquery_row, insert_id=None))

  if bigquery_data:
    client = big_query.Client(
        dataset_id='main', table_id='fuzz_strategy_probability')
    client.insert(bigquery_data)
  else:
    logs.log('No fuzz strategy distribution data was found to upload to '
             'BigQuery.')
    def test_write(self):
        """Tests write."""
        self.client.insert.return_value = {}
        big_query.write_range('regressions', self.testcase, 'regression', 456,
                              789)

        self.assertEqual(0, self.mock.log_error.call_count)
        self.client.insert.assert_called_once_with([
            big_query.Insert(row={
                'testcase_id': str(self.testcase.key.id()),
                'crash_type': 'type',
                'crash_state': 'state',
                'security_flag': True,
                'parent_fuzzer_name': 'libfuzzer',
                'fuzzer_name': 'libfuzzer_pdf',
                'job_type': 'some_job',
                'created_at': 99,
                'regression_range_start': 456,
                'regression_range_end': 789,
            },
                             insert_id='%s:456:789' % self.testcase.key.id())
        ])
Beispiel #12
0
  def test_create_many_groups(self, project_name):
    """Test creating many groups."""
    self.mock.get_project_name.return_value = project_name

    self.mock.insert.return_value = {'insertErrors': [{'index': 0}]}
    invalid_crash = self._make_crash('e1', state='error1')
    invalid_crash.error = 'error'

    # TODO(metzman): Add a seperate test for strategies.
    r2_stacktrace = ('r2\ncf::fuzzing_strategies: value_profile\n')

    crashes = [
        self._make_crash('r1', state='reproducible1'),
        self._make_crash(r2_stacktrace, state='reproducible1'),
        self._make_crash('r3', state='reproducible1'),
        self._make_crash('r4', state='reproducible2'),
        self._make_crash('u1', state='unreproducible1'),
        self._make_crash('u2', state='unreproducible2'),
        self._make_crash('u3', state='unreproducible2'),
        self._make_crash('u4', state='unreproducible3'), invalid_crash
    ]

    self.mock.test_for_reproducibility.side_effect = [
        False,  # For r1. It returns False. So, r1 is demoted.
        True,  # For r2. It returns True. So, r2 becomes primary for its group.
        True,  # For r4.
        False,  # For u1.
        False,  # For u2.
        False,  # For u3.
        False
    ]  # For u4.

    new_crash_count, known_crash_count, groups = fuzz_task.process_crashes(
        crashes=crashes,
        context=fuzz_task.Context(
            project_name=project_name,
            bot_name='bot',
            job_type='job',
            fuzz_target=data_types.FuzzTarget(engine='engine', binary='binary'),
            redzone=111,
            platform_id='platform',
            crash_revision=1234,
            fuzzer_name='fuzzer',
            window_argument='win_args',
            fuzzer_metadata={},
            testcases_metadata={},
            timeout_multiplier=1,
            test_timeout=2,
            thread_wait_timeout=3,
            data_directory='/data'))
    self.assertEqual(5, new_crash_count)
    self.assertEqual(3, known_crash_count)

    self.assertEqual(5, len(groups))
    self.assertEqual([
        'reproducible1', 'reproducible2', 'unreproducible1', 'unreproducible2',
        'unreproducible3'
    ], [group.main_crash.crash_state for group in groups])
    self.assertEqual([True, True, True, True, True],
                     [group.is_new() for group in groups])
    self.assertEqual([3, 1, 1, 2, 1], [len(group.crashes) for group in groups])

    testcases = list(data_types.Testcase.query())
    self.assertEqual(5, len(testcases))
    self.assertSetEqual(
        set([r2_stacktrace, 'r4', 'u1', 'u2', 'u4']),
        set(t.crash_stacktrace for t in testcases))

    self.assertSetEqual(
        set([
            '{"fuzzing_strategies": ["value_profile"]}', None, None, None, None
        ]), set(t.additional_metadata for t in testcases))

    # There's one invalid_crash. And r2 is a reproducible crash, so r3 doesn't
    # invoke archive_testcase_in_blobstore. Therefore, the
    # archive_testcase_in_blobstore is called `len(crashes) - 2`.
    self.assertEqual(
        len(crashes) - 2,
        self.mock.archive_testcase_and_dependencies_in_gcs.call_count)

    # Check only the desired testcases were saved.
    actual_crash_infos = [group.main_crash.crash_info for group in groups]
    if project_name != 'chromium':
      expected_crash_infos = [None] * len(actual_crash_infos)
    else:
      expected_saved_crash_info = crash_uploader.CrashReportInfo(
          product='Chrome_' + environment.platform().lower().capitalize(),
          version='this.is.fake.ver',
          serialized_crash_stack_frames='f00df00d')
      expected_crash_infos = [
          expected_saved_crash_info,  # r2 is main crash for group r1,r2,r3
          expected_saved_crash_info,  # r4 is main crash for its own group
          None,  # u1 is not reproducible
          None,  # u2, u3 are not reproducible
          None,  # u4 is not reproducible
      ]

    self.assertEqual(len(expected_crash_infos), len(actual_crash_infos))
    for expected, actual in zip(expected_crash_infos, actual_crash_infos):
      if not expected:
        self.assertIsNone(actual)
        continue

      self.assertEqual(expected.product, actual.product)
      self.assertEqual(expected.version, actual.version)
      self.assertEqual(expected.serialized_crash_stack_frames,
                       actual.serialized_crash_stack_frames)

    def _make_big_query_json(crash, reproducible_flag, new_flag, testcase_id):
      return {
          'crash_type': crash.crash_type,
          'crash_state': crash.crash_state,
          'created_at': 987,
          'platform': 'platform',
          'crash_time_in_ms': int(crash.crash_time * 1000),
          'parent_fuzzer_name': 'engine',
          'fuzzer_name': 'engine_binary',
          'job_type': 'job',
          'security_flag': crash.security_flag,
          'reproducible_flag': reproducible_flag,
          'revision': '1234',
          'new_flag': new_flag,
          'project': project_name,
          'testcase_id': testcase_id
      }

    def _get_testcase_id(crash):
      rows = list(
          data_types.Testcase.query(
              data_types.Testcase.crash_type == crash.crash_type,
              data_types.Testcase.crash_state == crash.crash_state,
              data_types.Testcase.security_flag == crash.security_flag))
      if not rows:
        return None
      return str(rows[0].key.id())

    # Calls to write 5 groups of crashes to BigQuery.
    self.assertEqual(5, self.mock.insert.call_count)
    self.mock.insert.assert_has_calls([
        mock.call(mock.ANY, [
            big_query.Insert(
                _make_big_query_json(crashes[0], True, False, None),
                '%s:bot:987:0' % crashes[0].key),
            big_query.Insert(
                _make_big_query_json(crashes[1], True, True,
                                     _get_testcase_id(crashes[1])),
                '%s:bot:987:1' % crashes[0].key),
            big_query.Insert(
                _make_big_query_json(crashes[2], True, False, None),
                '%s:bot:987:2' % crashes[0].key)
        ]),
        mock.call(mock.ANY, [
            big_query.Insert(
                _make_big_query_json(crashes[3], True, True,
                                     _get_testcase_id(crashes[3])),
                '%s:bot:987:0' % crashes[3].key)
        ]),
        mock.call(mock.ANY, [
            big_query.Insert(
                _make_big_query_json(crashes[4], False, True,
                                     _get_testcase_id(crashes[4])),
                '%s:bot:987:0' % crashes[4].key)
        ]),
        mock.call(mock.ANY, [
            big_query.Insert(
                _make_big_query_json(crashes[5], False, True,
                                     _get_testcase_id(crashes[5])),
                '%s:bot:987:0' % crashes[5].key),
            big_query.Insert(
                _make_big_query_json(crashes[6], False, False, None),
                '%s:bot:987:1' % crashes[5].key)
        ]),
        mock.call(mock.ANY, [
            big_query.Insert(
                _make_big_query_json(crashes[7], False, True,
                                     _get_testcase_id(crashes[7])),
                '%s:bot:987:0' % crashes[7].key)
        ]),
    ])