def _partition_selector(self): """Return the partition filter condition.""" result = ('(_PARTITIONTIME BETWEEN TIMESTAMP_SECONDS(%d) ' 'AND TIMESTAMP_SECONDS(%d))') return [ result % (int(utils.utc_date_to_timestamp(self.date_start)), int(utils.utc_date_to_timestamp(self.date_end))) ]
def _load_data(self, bigquery, fuzzer): """Load yesterday's stats into BigQuery.""" project_id = utils.get_application_id() yesterday = (self._utc_now().date() - datetime.timedelta(days=1)) date_string = yesterday.strftime('%Y%m%d') timestamp = utils.utc_date_to_timestamp(yesterday) dataset_id = fuzzer_stats.dataset_name(fuzzer) if not self._create_dataset_if_needed(bigquery, dataset_id): return for kind in STATS_KINDS: kind_name = kind.__name__ table_id = kind_name if not self._create_table_if_needed(bigquery, dataset_id, table_id): continue if kind == fuzzer_stats.TestcaseRun: schema = fuzzer_stats_schema.get(fuzzer) else: schema = kind.SCHEMA gcs_path = fuzzer_stats.get_gcs_stats_path(kind_name, fuzzer, timestamp) load = { 'destinationTable': { 'projectId': project_id, 'tableId': table_id + '$' + date_string, 'datasetId': dataset_id, }, 'schemaUpdateOptions': [ 'ALLOW_FIELD_ADDITION', ], 'sourceFormat': 'NEWLINE_DELIMITED_JSON', 'sourceUris': ['gs:/' + gcs_path + '*.json'], 'writeDisposition': 'WRITE_TRUNCATE', } if schema is not None: load['schema'] = schema job_body = { 'configuration': { 'load': load, }, } logs.log("Uploading job to BigQuery.", job_body=job_body) request = bigquery.jobs().insert(projectId=project_id, body=job_body) response = request.execute() # We cannot really check the response here, as the query might be still # running, but having a BigQuery jobId in the log would make our life # simpler if we ever have to manually check the status of the query. # See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query. logs.log('Response from BigQuery: %s' % response)
def upload_stats(stats_list, filename=None): """Upload the fuzzer run to the bigquery bucket. Assumes that all the stats given are for the same fuzzer/job run.""" if not stats_list: logs.log_error('Failed to upload fuzzer stats: empty stats.') return assert isinstance(stats_list, list) bucket_name = big_query.get_bucket() if not bucket_name: logs.log_error('Failed to upload fuzzer stats: missing bucket name.') return kind = stats_list[0].kind fuzzer = stats_list[0].fuzzer # Group all stats for fuzz targets. fuzzer_or_engine_name = get_fuzzer_or_engine_name(fuzzer) if not filename: # Generate a random filename. filename = '%016x' % random.randint(0, (1 << 64) - 1) + '.json' # Handle runs that bleed into the next day. timestamp_start_of_day = lambda s: utils.utc_date_to_timestamp( datetime.datetime.utcfromtimestamp(s.timestamp).date()) stats_list.sort(key=lambda s: s.timestamp) for timestamp, stats in itertools.groupby(stats_list, timestamp_start_of_day): upload_data = '\n'.join(stat.to_json() for stat in stats) day_path = 'gs:/' + get_gcs_stats_path( kind, fuzzer_or_engine_name, timestamp=timestamp) + filename if not storage.write_data(upload_data.encode('utf-8'), day_path): logs.log_error('Failed to upload FuzzerRun.')
def timestamp_start_of_day(s): return utils.utc_date_to_timestamp( datetime.datetime.utcfromtimestamp(s.timestamp).date())