Exemple #1
0
def record_cross_pollination_stats(pruner_stats, pollinator_stats,
                                   project_qualified_name, sources, tag,
                                   initial_corpus_size,
                                   minimized_corpus_size_units, method):
    """Log stats about cross pollination in BigQuery."""
    # TODO(mpherman): Find a way to collect these stats for OSS Fuzz.
    if environment.is_untrusted_worker():
        return
    # BigQuery not available in local development.This is necessary because the
    # untrusted runner is in a separate process and can't be easily mocked.
    if environment.get_value('LOCAL_DEVELOPMENT') or environment.get_value(
            'PY_UNITTESTS'):
        return

    if not pruner_stats or not pollinator_stats:
        return

    bigquery_row = {
        'project_qualified_name': project_qualified_name,
        'method': method,
        'sources': sources,
        'tags': tag if tag else '',
        'initial_corpus_size': initial_corpus_size,
        'corpus_size': minimized_corpus_size_units,
        'initial_edge_coverage': pruner_stats['edge_coverage'],
        'edge_coverage': pollinator_stats['edge_coverage'],
        'initial_feature_coverage': pruner_stats['feature_coverage'],
        'feature_coverage': pollinator_stats['feature_coverage']
    }

    client = big_query.Client(dataset_id='main',
                              table_id='cross_pollination_statistics')
    client.insert([big_query.Insert(row=bigquery_row, insert_id=None)])
Exemple #2
0
def _store_probabilities_in_bigquery(data):
    """Update a bigquery table containing the daily updated
  probability distribution over strategies."""
    bigquery_data = []

    # TODO(mukundv): Update once we choose a temperature parameter for final
    # implementation.
    for row in data:
        bigquery_row = {
            'strategy_name':
            row['strategy'],
            'probability_high_temperature':
            row['bandit_weight_high_temperature'],
            'probability_low_temperature':
            row['bandit_weight_low_temperature'],
            'probability_medium_temperature':
            row['bandit_weight_medium_temperature'],
            'run_count':
            row['run_count']
        }
        bigquery_data.append(big_query.Insert(row=bigquery_row,
                                              insert_id=None))

    client = big_query.Client(dataset_id='main',
                              table_id='fuzz_strategy_experiments')
    client.insert(bigquery_data)
Exemple #3
0
def get_last_crash_time(testcase):
    """Return timestamp for last crash with same crash params as testcase."""
    client = big_query.Client()

    where_clause = ('crash_type = {crash_type} AND '
                    'crash_state = {crash_state} AND '
                    'security_flag = {security_flag} AND '
                    'project = {project}').format(
                        crash_type=json.dumps(testcase.crash_type),
                        crash_state=json.dumps(testcase.crash_state),
                        security_flag=json.dumps(testcase.security_flag),
                        project=json.dumps(testcase.project_name),
                    )

    sql = """
SELECT hour
FROM main.crash_stats
WHERE {where_clause}
ORDER by hour DESC
LIMIT 1
""".format(where_clause=where_clause)

    result = client.query(query=sql)
    if result and result.rows:
        return get_datetime(result.rows[0]['hour'])

    return None
Exemple #4
0
def _query_multi_armed_bandit_probabilities():
    """Get query results.

  Queries above BANDIT_PROBABILITY_QUERY and yields results
  from bigquery. This query is sorted by strategies implemented."""
    client = big_query.Client()
    return client.query(query=BANDIT_PROBABILITY_QUERY).rows
Exemple #5
0
def _get_performance_features(fuzzer_name, job_type, datetime_start,
                              datetime_end):
    """Get raw performance features stored in BigQuery."""
    query_fields = [
        fuzzer_stats.QueryField(fuzzer_stats.TestcaseQuery.ALIAS, column, None)
        for column in constants.QUERY_COLUMNS
    ]

    # TODO(mmoroz): the query should be possible for datetime as well object.
    query = fuzzer_stats.TestcaseQuery(
        fuzzer_name=fuzzer_name,
        job_types=[job_type],
        query_fields=query_fields,
        group_by=fuzzer_stats.QueryGroupBy.GROUP_BY_NONE,
        date_start=datetime_start.date(),
        date_end=datetime_end.date())

    client = big_query.Client()

    try:
        result = client.query(query=query.build())
    except Exception as e:
        logging.error('Exception during BigQuery request: %s\n', str(e))
        raise helpers.EarlyExitException('Internal error.', 500)

    if not result.rows:
        raise helpers.EarlyExitException('No stats.', 404)

    return result
def _record_cross_pollination_stats(stats):
    """Log stats about cross pollination in BigQuery."""
    # If no stats were gathered due to a timeout or lack of corpus, return.
    if not stats:
        return
    # BigQuery not available in local development.This is necessary because the
    # untrusted runner is in a separate process and can't be easily mocked.
    if environment.get_value('LOCAL_DEVELOPMENT') or environment.get_value(
            'PY_UNITTESTS'):
        return

    bigquery_row = {
        'project_qualified_name': stats.project_qualified_name,
        'method': stats.method,
        'sources': stats.sources,
        'tags': stats.tag if stats.tag else '',
        'initial_corpus_size': stats.initial_corpus_size,
        'corpus_size': stats.minimized_corpus_size_units,
        'initial_edge_coverage': stats.initial_edge_coverage,
        'edge_coverage': stats.edge_coverage,
        'initial_feature_coverage': stats.initial_feature_coverate,
        'feature_coverage': stats.feature_coverage
    }

    client = big_query.Client(dataset_id='main',
                              table_id='cross_pollination_statistics')
    client.insert([big_query.Insert(row=bigquery_row, insert_id=None)])
    def test_multiple_page_with_limit(self):
        """Test multiple page with limit."""
        self.query.execute.return_value = self._make_resp('tok', True, 2)
        self.get_query_results.execute.side_effect = [
            self._make_resp('tok2', True, 2, 3),
            self._make_resp('tok3', True, 2, 3),
            self._make_resp('tok4', True, 2, 3),
            self._make_resp(None, True, 2, 1)
        ]

        client = big_query.Client()

        result = client.query('sql',
                              timeout=10,
                              max_results=100,
                              offset=50,
                              limit=10)

        self.assertEqual([{'t': 1}] * 10, result.rows)
        self.assertEqual(2, result.total_count)
        self.query.execute.assert_called_once_with()
        self.jobs.query.assert_called_once_with(projectId='project',
                                                body={
                                                    'query': 'sql',
                                                    'timeoutMs': 60000,
                                                    'useLegacySql': False,
                                                    'maxResults': 0
                                                })

        self.jobs.getQueryResults.assert_has_calls([
            mock.call(projectId='project',
                      jobId='job',
                      startIndex=50,
                      pageToken=None,
                      timeoutMs=60000,
                      maxResults=10),
            mock.call().execute(),
            mock.call(projectId='project',
                      jobId='job',
                      startIndex=0,
                      pageToken='tok2',
                      timeoutMs=60000,
                      maxResults=7),
            mock.call().execute(),
            mock.call(projectId='project',
                      jobId='job',
                      startIndex=0,
                      pageToken='tok3',
                      timeoutMs=60000,
                      maxResults=4),
            mock.call().execute(),
            mock.call(projectId='project',
                      jobId='job',
                      startIndex=0,
                      pageToken='tok4',
                      timeoutMs=60000,
                      maxResults=1),
            mock.call().execute()
        ])
    def get(self):
        """Process all fuzz targets and update FuzzTargetJob weights."""
        client = big_query.Client()
        update_target_weights_for_engine(client, 'libFuzzer',
                                         LIBFUZZER_SPECIFICATIONS)
        update_target_weights_for_engine(client, 'afl', AFL_SPECIFICATIONS)

        store_current_weights_in_bigquery()
Exemple #9
0
def get(params, query, offset, limit):
    """Get the data from BigQuery."""
    sql = SQL.format(table_id='%ss' % params['type'],
                     where_clause=query.get_where_clause(),
                     prefix=params['type'],
                     offset=offset,
                     limit=limit)
    client = big_query.Client()
    result = client.query(query=sql, offset=offset, limit=limit)
    return result.rows, result.total_count
Exemple #10
0
  def test_not_complete(self):
    """Test jobComplete=false for the first few request."""
    self.query.execute.return_value = self._make_resp(None, False, 4)
    self.get_query_results.execute.side_effect = [
        self._make_resp(None, False, 4),
        self._make_resp('tok', True, 8),
        self._make_resp(None, True, 8)
    ]

    client = big_query.Client()

    result = client.query('sql', timeout=10, max_results=100, offset=50)

    self.assertEqual([{'t': 1}, {'t': 1}], result.rows)
    self.assertEqual(8, result.total_count)
    self.query.execute.assert_called_once_with()
    self.jobs.query.assert_called_once_with(
        projectId='project',
        body={
            'query': 'sql',
            'timeoutMs': 60000,
            'useLegacySql': False,
            'maxResults': 0
        })

    self.jobs.getQueryResults.assert_has_calls([
        mock.call(
            projectId='project',
            jobId='job',
            timeoutMs=60000,
            maxResults=100,
            pageToken=None,
            startIndex=50),
        mock.call().execute(),
        mock.call(
            projectId='project',
            jobId='job',
            timeoutMs=60000,
            maxResults=100,
            pageToken=None,
            startIndex=50),
        mock.call().execute(),
        mock.call(
            projectId='project',
            jobId='job',
            pageToken='tok',
            timeoutMs=60000,
            maxResults=100,
            startIndex=0),
        mock.call().execute(),
    ])
def store_current_weights_in_bigquery():
    """Update a bigquery table containing the daily stats."""
    rows = []
    target_jobs = ndb_utils.get_all_from_model(data_types.FuzzTargetJob)
    for target_job in target_jobs:
        row = {
            'fuzzer': target_job.fuzz_target_name,
            'job': target_job.job,
            'weight': target_job.weight
        }
        rows.append(big_query.Insert(row=row, insert_id=None))

    client = big_query.Client(dataset_id='main', table_id='fuzzer_weights')
    client.insert(rows)
Exemple #12
0
def get_start_hour():
    """Get the start hour from the first crash."""
    client = big_query.Client()

    sql = """
SELECT min(CAST(FLOOR(UNIX_SECONDS(created_at) / 3600) AS INT64)) as min_hour
FROM main.crashes
"""

    result = client.query(query=sql)
    if result and result.rows:
        return result.rows[0]['min_hour']

    return 0
def _do_bigquery_query(query):
    """Return results from BigQuery."""
    logs.log(query)
    client = big_query.Client()

    try:
        results = client.raw_query(query, max_results=10000)
    except HttpError as e:
        raise helpers.EarlyExitException(str(e), 500)

    if 'rows' not in results:
        raise helpers.EarlyExitException('No stats.', 404)

    return results
    def test_init(self):
        """Test __init__."""
        client = big_query.Client(dataset_id='data', table_id='table')

        self.assertEqual('built', client.client)
        self.assertEqual('project', client.project_id)
        self.assertEqual('data', client.dataset_id)
        self.assertEqual('table', client.table_id)

        # `self.mock.build.assert_called_once_with(
        #      'bigquery', 'v2')` doesn't
        # work because `discovery.build` is decorated with @positional.
        # Therefore, we need the below.
        args, _ = self.mock.build.call_args
        self.assertEqual(('bigquery', 'v2'), args)
Exemple #15
0
def _store_probabilities_in_bigquery(data):
  """Update a bigquery table containing the daily updated
  probability distribution over strategies."""
  bigquery_data = []

  for row in data:
    bigquery_row = {
        'strategy_name': row['strategy'],
        'probability': row['bandit_weight'],
        'run_count': row['run_count']
    }
    bigquery_data.append(big_query.Insert(row=bigquery_row, insert_id=None))

  client = big_query.Client(
      dataset_id='main', table_id='fuzz_strategy_probability')
  client.insert(bigquery_data)
Exemple #16
0
def _query_multi_armed_bandit_probabilities():
    """Get query results.

  Queries above BANDIT_PROBABILITY_QUERY and yields results
  from bigquery. This query is sorted by strategies implemented."""
    client = big_query.Client()
    formatted_query = BANDIT_PROBABILITY_QUERY.format(
        high_temperature_query=BANDIT_PROBABILITY_SUBQUERY.format(
            temperature_type='high',
            temperature_value=HIGH_TEMPERATURE_PARAMETER),
        low_temperature_query=BANDIT_PROBABILITY_SUBQUERY.format(
            temperature_type='low',
            temperature_value=LOW_TEMPERATURE_PARAMETER),
        medium_temperature_query=BANDIT_PROBABILITY_SUBQUERY.format(
            temperature_type='medium',
            temperature_value=MEDIUM_TEMPERATURE_PARAMETER))
    return client.query(query=formatted_query).rows
Exemple #17
0
  def test_insert(self):
    """Test calling insertAll API."""
    underlying = mock.MagicMock()
    tabledata = mock.MagicMock()
    insert_all = mock.MagicMock()

    underlying.tabledata.return_value = tabledata
    tabledata.insertAll.return_value = insert_all
    insert_all.execute.return_value = {'test': 1}
    self.mock.get_api_client.return_value = underlying

    client = big_query.Client(dataset_id='data', table_id='table')

    self.assertDictEqual({
        'test': 1
    },
                         client.insert([
                             big_query.Insert({
                                 'a': 1
                             }, 'prefix:0'),
                             big_query.Insert({
                                 'b': 2
                             }, 'prefix:1')
                         ]))
    tabledata.insertAll.assert_called_once_with(
        projectId='project',
        datasetId='data',
        tableId='table',
        body={
            'kind':
                'bigquery#tableDataInsertAllRequest',
            'rows': [{
                'json': {
                    'a': 1
                },
                'insertId': 'prefix:0'
            }, {
                'json': {
                    'b': 2
                },
                'insertId': 'prefix:1'
            }]
        })
Exemple #18
0
  def test_timeout(self):
    """Test timeout."""
    self.query.execute.return_value = self._make_resp(None, False, 3)

    self.count = 0

    def get_query_results(**unused_kwargs):
      self.count += 1
      if self.count >= 3:
        self.mock.time.return_value = 100
      return self._make_resp('tok%d' % self.count, False, 3)

    self.get_query_results.execute.side_effect = get_query_results

    client = big_query.Client()

    with self.assertRaises(Exception) as cm:
      client.query('sql', timeout=10, max_results=100, offset=50)

    self.assertEqual("Timeout: the query doesn't finish within 10 seconds.",
                     cm.exception.message)

    self.query.execute.assert_called_once_with()
    self.jobs.query.assert_called_once_with(
        projectId='project',
        body={
            'query': 'sql',
            'timeoutMs': 60000,
            'useLegacySql': False,
            'maxResults': 0
        })

    self.jobs.getQueryResults.assert_has_calls([
        mock.call(
            projectId='project',
            jobId='job',
            timeoutMs=60000,
            maxResults=100,
            pageToken=None,
            startIndex=50),
        mock.call().execute(),
    ] * 3)
    def test_one_page(self):
        """Test one page."""
        self.query.execute.return_value = self._make_resp(None, True, 1)
        self.get_query_results.execute.return_value = self._make_resp(
            None, True, 1)
        client = big_query.Client()

        result = client.query('sql', timeout=10, max_results=100)

        self.assertEqual([{'t': 1}], result.rows)
        self.assertEqual(1, result.total_count)
        self.query.execute.assert_called_once_with()
        self.get_query_results.execute.assert_called_once_with()
        self.jobs.query.assert_called_once_with(projectId='project',
                                                body={
                                                    'query': 'sql',
                                                    'timeoutMs': 60000,
                                                    'useLegacySql': False,
                                                    'maxResults': 0
                                                })
def _query_multi_armed_bandit_probabilities(engine):
    """Get query results.

  Queries above BANDIT_PROBABILITY_QUERY and yields results
  from bigquery. This query is sorted by strategies implemented."""
    strategy_names_list = [
        strategy_entry.name for strategy_entry in engine.query_strategy_list
    ]
    strategies_subquery = '\n'.join(
        STRATEGY_SUBQUERY_FORMAT.format(strategy_name=strategy_name)
        for strategy_name in strategy_names_list)
    client = big_query.Client()
    strategies = ','.join('strategy_' + strategy_name
                          for strategy_name in strategy_names_list)
    formatted_query = BANDIT_PROBABILITY_QUERY_FORMAT.format(
        performance_metric=engine.performance_metric,
        temperature_value=TEMPERATURE_PARAMETER,
        strategies=strategies,
        strategies_subquery=strategies_subquery,
        engine=engine.name)
    return client.query(query=formatted_query).rows
    def test_query(self):
        """Test calling query API."""
        underlying = mock.MagicMock()
        jobs = mock.MagicMock()
        query = mock.MagicMock()

        underlying.jobs.return_value = jobs
        jobs.query.return_value = query
        query.execute.return_value = {'test': 1}
        self.mock.get_api_client.return_value = underlying

        client = big_query.Client()

        self.assertDictEqual({'test': 1},
                             client.raw_query('sql', max_results=100))
        jobs.query.assert_called_once_with(projectId='project',
                                           body={
                                               'query': 'sql',
                                               'timeoutMs': 60000,
                                               'maxResults': 100,
                                               'useLegacySql': False
                                           })
Exemple #22
0
def build(end_hour):
    """Build crash stats for the end hour."""
    logging.info('Started building crash stats for %s.',
                 crash_stats.get_datetime(end_hour))
    job_id = JOB_ID_TEMPLATE.format(unique_number=int(time.time()))

    client = big_query.Client()
    make_request(client, job_id, end_hour)

    start_time = time.time()
    while (time.time() - start_time) < TIMEOUT:
        time.sleep(10)

        result = client.get_job(job_id)
        logging.info('Checking %s', json.dumps(result))

        if result['status']['state'] == 'DONE':
            if result['status'].get('errors'):
                raise Exception(json.dumps(result))
            return

    raise Exception('Building crash stats exceeded %d seconds.' % TIMEOUT)
def _store_probabilities_in_bigquery(engine, data):
  """Update a bigquery table containing the daily updated
  probability distribution over strategies."""
  bigquery_data = []

  # TODO(mukundv): Update once we choose a temperature parameter for final
  # implementation.
  for row in data:
    bigquery_row = {
        'strategy_name': row['strategy'],
        'probability': row['bandit_weight'],
        'engine': engine.name
    }
    bigquery_data.append(big_query.Insert(row=bigquery_row, insert_id=None))

  if bigquery_data:
    client = big_query.Client(
        dataset_id='main', table_id='fuzz_strategy_probability')
    client.insert(bigquery_data)
  else:
    logs.log('No fuzz strategy distribution data was found to upload to '
             'BigQuery.')
Exemple #24
0
def get(end, days, block, group_by, where_clause, group_having_clause, sort_by,
        offset, limit):
    """Query from BigQuery given the params."""
    if where_clause:
        where_clause = '(%s) AND ' % where_clause

    start = end - (days * 24) + 1

    where_clause += '(hour BETWEEN %d AND %d) AND ' % (start, end)
    where_clause += ('(_PARTITIONTIME BETWEEN TIMESTAMP_TRUNC("%s", DAY) '
                     'AND TIMESTAMP_TRUNC("%s", DAY))' %
                     (get_datetime(start).strftime('%Y-%m-%d'),
                      get_datetime(end).strftime('%Y-%m-%d')))

    time_span = 1 if block == 'hour' else 24
    remainder = get_remainder_for_index(end, time_span)

    if group_having_clause:
        group_having_clause = 'HAVING ' + group_having_clause

    sql = SQL.format(time_span=time_span,
                     remainder=remainder,
                     group_by=group_by,
                     where_clause=where_clause,
                     group_having_clause=group_having_clause,
                     sort_by=sort_by)

    client = big_query.Client()
    result = client.query(query=sql, offset=offset, limit=limit)

    items = []
    for row in result.rows:
        avg_crash_time_in_ms = row['sum_crash_time_in_ms'] // row['total_count']

        for group in row['groups']:
            for index in group['indices']:
                index['hour'] = convert_index_to_hour(index['index'],
                                                      time_span, remainder)

        items.append({
            'projectName': row['project'],
            'crashType': row['crash_type'],
            'crashState': row['crash_state'],
            'isSecurity': row['security_flag'],
            'isReproducible': row['is_reproducible'],
            'isNew': row['is_new'],
            'totalCount': row['total_count'],
            'crashTime': {
                'min':
                row['min_crash_time_in_ms'],
                'max':
                row['max_crash_time_in_ms'],
                'avg':
                avg_crash_time_in_ms,
                'std':
                math.sqrt((row['sum_square_crash_time_in_ms'] //
                           row['total_count']) -
                          (avg_crash_time_in_ms * avg_crash_time_in_ms))
            },
            'groups': row['groups'],
            'days': days,
            'block': block,
            'end': end + 1  # Convert to UI's end.
        })
    return result.total_count, items