def _load_data(self, bigquery, fuzzer): """Load yesterday's stats into BigQuery.""" project_id = utils.get_application_id() yesterday = (self._utc_now().date() - datetime.timedelta(days=1)) date_string = yesterday.strftime('%Y%m%d') timestamp = utils.utc_date_to_timestamp(yesterday) dataset_id = fuzzer_stats.dataset_name(fuzzer) if not self._create_dataset_if_needed(bigquery, dataset_id): return for kind in STATS_KINDS: kind_name = kind.__name__ table_id = kind_name if not self._create_table_if_needed(bigquery, dataset_id, table_id): continue if kind == fuzzer_stats.TestcaseRun: schema = fuzzer_stats_schema.get(fuzzer) else: schema = kind.SCHEMA gcs_path = fuzzer_stats.get_gcs_stats_path(kind_name, fuzzer, timestamp) load = { 'destinationTable': { 'projectId': project_id, 'tableId': table_id + '$' + date_string, 'datasetId': dataset_id, }, 'schemaUpdateOptions': [ 'ALLOW_FIELD_ADDITION', ], 'sourceFormat': 'NEWLINE_DELIMITED_JSON', 'sourceUris': ['gs:/' + gcs_path + '*.json'], 'writeDisposition': 'WRITE_TRUNCATE', } if schema is not None: load['schema'] = schema job_body = { 'configuration': { 'load': load, }, } logs.log("Uploading job to BigQuery.", job_body=job_body) request = bigquery.jobs().insert(projectId=project_id, body=job_body) response = request.execute() # We cannot really check the response here, as the query might be still # running, but having a BigQuery jobId in the log would make our life # simpler if we ever have to manually check the status of the query. # See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query. logs.log('Response from BigQuery: %s' % response)
def update_matches_for_specification(specification, client, engine, matched_specifications, run_set): """Run a query and adjust weights based on a given query specification.""" query = specification.formatter(specification.query_format, fuzzer_stats.dataset_name(engine)) results = _query_helper(client, query) for result in results: fuzzer = result['fuzzer'] job = result['job'] ratio = result['ratio'] run_set.add((fuzzer, job)) if ratio >= specification.threshold: _update_match(matched_specifications, fuzzer, job, specification)
def update_matches_for_specification(specification, client, engine, matches, run_set): """Run a query and adjust weights based on a given query specification.""" query = specification.formatter(specification.query_format, fuzzer_stats.dataset_name(engine)) results = _query_helper(client, query) for result in results: fuzzer = result['fuzzer'] job = result['job'] new_weight = result['new_weight'] run_set.add((fuzzer, job)) if new_weight != 1.0: match = SpecificationMatch(new_weight=new_weight, reason=specification.reason) _update_match(matches, fuzzer, job, match)