Exemple #1
0
    def _partition_selector(self):
        """Return the partition filter condition."""
        result = ('(_PARTITIONTIME BETWEEN TIMESTAMP_SECONDS(%d) '
                  'AND TIMESTAMP_SECONDS(%d))')

        return [
            result % (int(utils.utc_date_to_timestamp(self.date_start)),
                      int(utils.utc_date_to_timestamp(self.date_end)))
        ]
Exemple #2
0
    def _load_data(self, bigquery, fuzzer):
        """Load yesterday's stats into BigQuery."""
        project_id = utils.get_application_id()

        yesterday = (self._utc_now().date() - datetime.timedelta(days=1))
        date_string = yesterday.strftime('%Y%m%d')
        timestamp = utils.utc_date_to_timestamp(yesterday)

        dataset_id = fuzzer_stats.dataset_name(fuzzer)
        if not self._create_dataset_if_needed(bigquery, dataset_id):
            return

        for kind in STATS_KINDS:
            kind_name = kind.__name__
            table_id = kind_name
            if not self._create_table_if_needed(bigquery, dataset_id,
                                                table_id):
                continue

            if kind == fuzzer_stats.TestcaseRun:
                schema = fuzzer_stats_schema.get(fuzzer)
            else:
                schema = kind.SCHEMA

            gcs_path = fuzzer_stats.get_gcs_stats_path(kind_name, fuzzer,
                                                       timestamp)
            load = {
                'destinationTable': {
                    'projectId': project_id,
                    'tableId': table_id + '$' + date_string,
                    'datasetId': dataset_id,
                },
                'schemaUpdateOptions': [
                    'ALLOW_FIELD_ADDITION',
                ],
                'sourceFormat': 'NEWLINE_DELIMITED_JSON',
                'sourceUris': ['gs:/' + gcs_path + '*.json'],
                'writeDisposition': 'WRITE_TRUNCATE',
            }
            if schema is not None:
                load['schema'] = schema

            job_body = {
                'configuration': {
                    'load': load,
                },
            }

            logs.log("Uploading job to BigQuery.", job_body=job_body)
            request = bigquery.jobs().insert(projectId=project_id,
                                             body=job_body)
            response = request.execute()

            # We cannot really check the response here, as the query might be still
            # running, but having a BigQuery jobId in the log would make our life
            # simpler if we ever have to manually check the status of the query.
            # See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query.
            logs.log('Response from BigQuery: %s' % response)
Exemple #3
0
def upload_stats(stats_list, filename=None):
    """Upload the fuzzer run to the bigquery bucket. Assumes that all the stats
  given are for the same fuzzer/job run."""
    if not stats_list:
        logs.log_error('Failed to upload fuzzer stats: empty stats.')
        return

    assert isinstance(stats_list, list)

    bucket_name = big_query.get_bucket()
    if not bucket_name:
        logs.log_error('Failed to upload fuzzer stats: missing bucket name.')
        return

    kind = stats_list[0].kind
    fuzzer = stats_list[0].fuzzer

    # Group all stats for fuzz targets.
    fuzzer_or_engine_name = get_fuzzer_or_engine_name(fuzzer)

    if not filename:
        # Generate a random filename.
        filename = '%016x' % random.randint(0, (1 << 64) - 1) + '.json'

    # Handle runs that bleed into the next day.
    timestamp_start_of_day = lambda s: utils.utc_date_to_timestamp(
        datetime.datetime.utcfromtimestamp(s.timestamp).date())
    stats_list.sort(key=lambda s: s.timestamp)

    for timestamp, stats in itertools.groupby(stats_list,
                                              timestamp_start_of_day):
        upload_data = '\n'.join(stat.to_json() for stat in stats)

        day_path = 'gs:/' + get_gcs_stats_path(
            kind, fuzzer_or_engine_name, timestamp=timestamp) + filename

        if not storage.write_data(upload_data.encode('utf-8'), day_path):
            logs.log_error('Failed to upload FuzzerRun.')
 def timestamp_start_of_day(s):
     return utils.utc_date_to_timestamp(
         datetime.datetime.utcfromtimestamp(s.timestamp).date())