def test_chromeos_platform(self): """Test ChromeOS platform is written in stats.""" self.client.insert.return_value = {'insertErrors': [{'index': 1}]} context = self._create_context('job_chromeos', 'linux') fuzz_task.write_crashes_to_big_query(self.group, context) success_count = monitoring_metrics.BIG_QUERY_WRITE_COUNT.get({ 'success': True }) failure_count = monitoring_metrics.BIG_QUERY_WRITE_COUNT.get({ 'success': False }) self.assertEqual(2, success_count) self.assertEqual(1, failure_count) self.mock.Client.assert_called_once_with( dataset_id='main', table_id='crashes$19700101') self.client.insert.assert_called_once_with([ big_query.Insert( self._json('job_chromeos', 'chrome', 'c1', True, 't'), 'key:bot:99:0'), big_query.Insert( self._json('job_chromeos', 'chrome', 'c2', False, None), 'key:bot:99:1'), big_query.Insert( self._json('job_chromeos', 'chrome', 'c3', False, None), 'key:bot:99:2') ])
def test_all_succeed(self): """Test writing succeeds.""" self.client.insert.return_value = {} context = self._create_context('job', 'linux') fuzz_task.write_crashes_to_big_query(self.group, context) success_count = monitoring_metrics.BIG_QUERY_WRITE_COUNT.get({ 'success': True }) failure_count = monitoring_metrics.BIG_QUERY_WRITE_COUNT.get({ 'success': False }) self.assertEqual(3, success_count) self.assertEqual(0, failure_count) self.mock.Client.assert_called_once_with( dataset_id='main', table_id='crashes$19700101') self.client.insert.assert_called_once_with([ big_query.Insert( self._json('job', 'linux', 'c1', True, 't'), 'key:bot:99:0'), big_query.Insert( self._json('job', 'linux', 'c2', False, None), 'key:bot:99:1'), big_query.Insert( self._json('job', 'linux', 'c3', False, None), 'key:bot:99:2') ])
def _record_cross_pollination_stats(stats): """Log stats about cross pollination in BigQuery.""" # If no stats were gathered due to a timeout or lack of corpus, return. if not stats: return # BigQuery not available in local development.This is necessary because the # untrusted runner is in a separate process and can't be easily mocked. if environment.get_value('LOCAL_DEVELOPMENT') or environment.get_value( 'PY_UNITTESTS'): return bigquery_row = { 'project_qualified_name': stats.project_qualified_name, 'method': stats.method, 'sources': stats.sources, 'tags': stats.tag if stats.tag else '', 'initial_corpus_size': stats.initial_corpus_size, 'corpus_size': stats.minimized_corpus_size_units, 'initial_edge_coverage': stats.initial_edge_coverage, 'edge_coverage': stats.edge_coverage, 'initial_feature_coverage': stats.initial_feature_coverate, 'feature_coverage': stats.feature_coverage } client = big_query.Client(dataset_id='main', table_id='cross_pollination_statistics') client.insert([big_query.Insert(row=bigquery_row, insert_id=None)])
def record_cross_pollination_stats(pruner_stats, pollinator_stats, project_qualified_name, sources, tag, initial_corpus_size, minimized_corpus_size_units, method): """Log stats about cross pollination in BigQuery.""" # TODO(mpherman): Find a way to collect these stats for OSS Fuzz. if environment.is_untrusted_worker(): return # BigQuery not available in local development.This is necessary because the # untrusted runner is in a separate process and can't be easily mocked. if environment.get_value('LOCAL_DEVELOPMENT') or environment.get_value( 'PY_UNITTESTS'): return if not pruner_stats or not pollinator_stats: return bigquery_row = { 'project_qualified_name': project_qualified_name, 'method': method, 'sources': sources, 'tags': tag if tag else '', 'initial_corpus_size': initial_corpus_size, 'corpus_size': minimized_corpus_size_units, 'initial_edge_coverage': pruner_stats['edge_coverage'], 'edge_coverage': pollinator_stats['edge_coverage'], 'initial_feature_coverage': pruner_stats['feature_coverage'], 'feature_coverage': pollinator_stats['feature_coverage'] } client = big_query.Client(dataset_id='main', table_id='cross_pollination_statistics') client.insert([big_query.Insert(row=bigquery_row, insert_id=None)])
def _store_probabilities_in_bigquery(data): """Update a bigquery table containing the daily updated probability distribution over strategies.""" bigquery_data = [] # TODO(mukundv): Update once we choose a temperature parameter for final # implementation. for row in data: bigquery_row = { 'strategy_name': row['strategy'], 'probability_high_temperature': row['bandit_weight_high_temperature'], 'probability_low_temperature': row['bandit_weight_low_temperature'], 'probability_medium_temperature': row['bandit_weight_medium_temperature'], 'run_count': row['run_count'] } bigquery_data.append(big_query.Insert(row=bigquery_row, insert_id=None)) client = big_query.Client(dataset_id='main', table_id='fuzz_strategy_experiments') client.insert(bigquery_data)
def test_error(self): """Tests error.""" self.client.insert.return_value = {'insertErrors': ['exception']} big_query.write_range('regressions', self.testcase, 'regression', 456, 789) self.mock.log_error.assert_called_once_with( ("Ignoring error writing the testcase's regression range (%s) to " 'BigQuery.' % self.testcase.key.id()), exception=mock.ANY) self.client.insert.assert_called_once_with([ big_query.Insert(row={ 'testcase_id': str(self.testcase.key.id()), 'crash_type': 'type', 'crash_state': 'state', 'security_flag': True, 'parent_fuzzer_name': 'libfuzzer', 'fuzzer_name': 'libfuzzer_pdf', 'job_type': 'some_job', 'created_at': 99, 'regression_range_start': 456, 'regression_range_end': 789, }, insert_id='%s:456:789' % self.testcase.key.id()) ])
def test_insert(self): """Test calling insertAll API.""" underlying = mock.MagicMock() tabledata = mock.MagicMock() insert_all = mock.MagicMock() underlying.tabledata.return_value = tabledata tabledata.insertAll.return_value = insert_all insert_all.execute.return_value = {'test': 1} self.mock.get_api_client.return_value = underlying client = big_query.Client(dataset_id='data', table_id='table') self.assertDictEqual({ 'test': 1 }, client.insert([ big_query.Insert({ 'a': 1 }, 'prefix:0'), big_query.Insert({ 'b': 2 }, 'prefix:1') ])) tabledata.insertAll.assert_called_once_with( projectId='project', datasetId='data', tableId='table', body={ 'kind': 'bigquery#tableDataInsertAllRequest', 'rows': [{ 'json': { 'a': 1 }, 'insertId': 'prefix:0' }, { 'json': { 'b': 2 }, 'insertId': 'prefix:1' }] })
def store_current_weights_in_bigquery(): """Update a bigquery table containing the daily stats.""" rows = [] target_jobs = ndb_utils.get_all_from_model(data_types.FuzzTargetJob) for target_job in target_jobs: row = { 'fuzzer': target_job.fuzz_target_name, 'job': target_job.job, 'weight': target_job.weight } rows.append(big_query.Insert(row=row, insert_id=None)) client = big_query.Client(dataset_id='main', table_id='fuzzer_weights') client.insert(rows)
def _store_probabilities_in_bigquery(data): """Update a bigquery table containing the daily updated probability distribution over strategies.""" bigquery_data = [] for row in data: bigquery_row = { 'strategy_name': row['strategy'], 'probability': row['bandit_weight'], 'run_count': row['run_count'] } bigquery_data.append(big_query.Insert(row=bigquery_row, insert_id=None)) client = big_query.Client( dataset_id='main', table_id='fuzz_strategy_probability') client.insert(bigquery_data)
def _store_probabilities_in_bigquery(engine, data): """Update a bigquery table containing the daily updated probability distribution over strategies.""" bigquery_data = [] # TODO(mukundv): Update once we choose a temperature parameter for final # implementation. for row in data: bigquery_row = { 'strategy_name': row['strategy'], 'probability': row['bandit_weight'], 'engine': engine.name } bigquery_data.append(big_query.Insert(row=bigquery_row, insert_id=None)) if bigquery_data: client = big_query.Client( dataset_id='main', table_id='fuzz_strategy_probability') client.insert(bigquery_data) else: logs.log('No fuzz strategy distribution data was found to upload to ' 'BigQuery.')
def test_write(self): """Tests write.""" self.client.insert.return_value = {} big_query.write_range('regressions', self.testcase, 'regression', 456, 789) self.assertEqual(0, self.mock.log_error.call_count) self.client.insert.assert_called_once_with([ big_query.Insert(row={ 'testcase_id': str(self.testcase.key.id()), 'crash_type': 'type', 'crash_state': 'state', 'security_flag': True, 'parent_fuzzer_name': 'libfuzzer', 'fuzzer_name': 'libfuzzer_pdf', 'job_type': 'some_job', 'created_at': 99, 'regression_range_start': 456, 'regression_range_end': 789, }, insert_id='%s:456:789' % self.testcase.key.id()) ])
def test_create_many_groups(self, project_name): """Test creating many groups.""" self.mock.get_project_name.return_value = project_name self.mock.insert.return_value = {'insertErrors': [{'index': 0}]} invalid_crash = self._make_crash('e1', state='error1') invalid_crash.error = 'error' # TODO(metzman): Add a seperate test for strategies. r2_stacktrace = ('r2\ncf::fuzzing_strategies: value_profile\n') crashes = [ self._make_crash('r1', state='reproducible1'), self._make_crash(r2_stacktrace, state='reproducible1'), self._make_crash('r3', state='reproducible1'), self._make_crash('r4', state='reproducible2'), self._make_crash('u1', state='unreproducible1'), self._make_crash('u2', state='unreproducible2'), self._make_crash('u3', state='unreproducible2'), self._make_crash('u4', state='unreproducible3'), invalid_crash ] self.mock.test_for_reproducibility.side_effect = [ False, # For r1. It returns False. So, r1 is demoted. True, # For r2. It returns True. So, r2 becomes primary for its group. True, # For r4. False, # For u1. False, # For u2. False, # For u3. False ] # For u4. new_crash_count, known_crash_count, groups = fuzz_task.process_crashes( crashes=crashes, context=fuzz_task.Context( project_name=project_name, bot_name='bot', job_type='job', fuzz_target=data_types.FuzzTarget(engine='engine', binary='binary'), redzone=111, platform_id='platform', crash_revision=1234, fuzzer_name='fuzzer', window_argument='win_args', fuzzer_metadata={}, testcases_metadata={}, timeout_multiplier=1, test_timeout=2, thread_wait_timeout=3, data_directory='/data')) self.assertEqual(5, new_crash_count) self.assertEqual(3, known_crash_count) self.assertEqual(5, len(groups)) self.assertEqual([ 'reproducible1', 'reproducible2', 'unreproducible1', 'unreproducible2', 'unreproducible3' ], [group.main_crash.crash_state for group in groups]) self.assertEqual([True, True, True, True, True], [group.is_new() for group in groups]) self.assertEqual([3, 1, 1, 2, 1], [len(group.crashes) for group in groups]) testcases = list(data_types.Testcase.query()) self.assertEqual(5, len(testcases)) self.assertSetEqual( set([r2_stacktrace, 'r4', 'u1', 'u2', 'u4']), set(t.crash_stacktrace for t in testcases)) self.assertSetEqual( set([ '{"fuzzing_strategies": ["value_profile"]}', None, None, None, None ]), set(t.additional_metadata for t in testcases)) # There's one invalid_crash. And r2 is a reproducible crash, so r3 doesn't # invoke archive_testcase_in_blobstore. Therefore, the # archive_testcase_in_blobstore is called `len(crashes) - 2`. self.assertEqual( len(crashes) - 2, self.mock.archive_testcase_and_dependencies_in_gcs.call_count) # Check only the desired testcases were saved. actual_crash_infos = [group.main_crash.crash_info for group in groups] if project_name != 'chromium': expected_crash_infos = [None] * len(actual_crash_infos) else: expected_saved_crash_info = crash_uploader.CrashReportInfo( product='Chrome_' + environment.platform().lower().capitalize(), version='this.is.fake.ver', serialized_crash_stack_frames='f00df00d') expected_crash_infos = [ expected_saved_crash_info, # r2 is main crash for group r1,r2,r3 expected_saved_crash_info, # r4 is main crash for its own group None, # u1 is not reproducible None, # u2, u3 are not reproducible None, # u4 is not reproducible ] self.assertEqual(len(expected_crash_infos), len(actual_crash_infos)) for expected, actual in zip(expected_crash_infos, actual_crash_infos): if not expected: self.assertIsNone(actual) continue self.assertEqual(expected.product, actual.product) self.assertEqual(expected.version, actual.version) self.assertEqual(expected.serialized_crash_stack_frames, actual.serialized_crash_stack_frames) def _make_big_query_json(crash, reproducible_flag, new_flag, testcase_id): return { 'crash_type': crash.crash_type, 'crash_state': crash.crash_state, 'created_at': 987, 'platform': 'platform', 'crash_time_in_ms': int(crash.crash_time * 1000), 'parent_fuzzer_name': 'engine', 'fuzzer_name': 'engine_binary', 'job_type': 'job', 'security_flag': crash.security_flag, 'reproducible_flag': reproducible_flag, 'revision': '1234', 'new_flag': new_flag, 'project': project_name, 'testcase_id': testcase_id } def _get_testcase_id(crash): rows = list( data_types.Testcase.query( data_types.Testcase.crash_type == crash.crash_type, data_types.Testcase.crash_state == crash.crash_state, data_types.Testcase.security_flag == crash.security_flag)) if not rows: return None return str(rows[0].key.id()) # Calls to write 5 groups of crashes to BigQuery. self.assertEqual(5, self.mock.insert.call_count) self.mock.insert.assert_has_calls([ mock.call(mock.ANY, [ big_query.Insert( _make_big_query_json(crashes[0], True, False, None), '%s:bot:987:0' % crashes[0].key), big_query.Insert( _make_big_query_json(crashes[1], True, True, _get_testcase_id(crashes[1])), '%s:bot:987:1' % crashes[0].key), big_query.Insert( _make_big_query_json(crashes[2], True, False, None), '%s:bot:987:2' % crashes[0].key) ]), mock.call(mock.ANY, [ big_query.Insert( _make_big_query_json(crashes[3], True, True, _get_testcase_id(crashes[3])), '%s:bot:987:0' % crashes[3].key) ]), mock.call(mock.ANY, [ big_query.Insert( _make_big_query_json(crashes[4], False, True, _get_testcase_id(crashes[4])), '%s:bot:987:0' % crashes[4].key) ]), mock.call(mock.ANY, [ big_query.Insert( _make_big_query_json(crashes[5], False, True, _get_testcase_id(crashes[5])), '%s:bot:987:0' % crashes[5].key), big_query.Insert( _make_big_query_json(crashes[6], False, False, None), '%s:bot:987:1' % crashes[5].key) ]), mock.call(mock.ANY, [ big_query.Insert( _make_big_query_json(crashes[7], False, True, _get_testcase_id(crashes[7])), '%s:bot:987:0' % crashes[7].key) ]), ])