def get(self):
    """Load bigquery stats from GCS."""
    if not big_query.get_bucket():
      logs.log_error('Loading stats to BigQuery failed: missing bucket name.')
      return

    # Retrieve list of fuzzers before iterating them, since the query can expire
    # as we create the load jobs.
    bigquery_client = big_query.get_api_client()
    for fuzzer in list(data_types.Fuzzer.query()):
      logs.log('Loading stats to BigQuery for %s.' % fuzzer.name)
      self._load_data(bigquery_client, fuzzer.name)
Exemple #2
0
def get_gcs_stats_path(kind, fuzzer, timestamp):
    """Return gcs path in the format "/bucket/path/to/containing_dir/" for the
  given fuzzer, job, and timestamp or revision."""
    bucket_name = big_query.get_bucket()
    if not bucket_name:
        return None

    datetime_value = datetime.datetime.utcfromtimestamp(timestamp)
    dir_name = data_types.coverage_information_date_to_string(datetime_value)

    path = '/%s/%s/%s/date/%s/' % (bucket_name, fuzzer, kind, dir_name)
    return path
def upload_stats(stats_list, filename=None):
    """Upload the fuzzer run to the bigquery bucket. Assumes that all the stats
    given are for the same fuzzer/job run."""
    if not stats_list:
        logs.log_error("Failed to upload fuzzer stats: empty stats.")
        return

    if not isinstance(stats_list, list):
        raise AssertionError

    bucket_name = big_query.get_bucket()
    if not bucket_name:
        logs.log_error("Failed to upload fuzzer stats: missing bucket name.")
        return

    kind = stats_list[0].kind
    fuzzer = stats_list[0].fuzzer

    # Group all stats for fuzz targets.
    fuzzer_or_engine_name = get_fuzzer_or_engine_name(fuzzer)

    if not filename:
        # Generate a random filename.
        filename = "%016x" % random.randint(0, (1 << 64) - 1) + ".json"

    # Handle runs that bleed into the next day.
    def timestamp_start_of_day(s):
        return utils.utc_date_to_timestamp(
            datetime.datetime.utcfromtimestamp(s.timestamp).date())

    stats_list.sort(key=lambda s: s.timestamp)

    for timestamp, stats in itertools.groupby(stats_list,
                                              timestamp_start_of_day):
        upload_data = "\n".join(stat.to_json() for stat in stats)

        day_path = ("gs:/" + get_gcs_stats_path(
            kind, fuzzer_or_engine_name, timestamp=timestamp) + filename)

        if not storage.write_data(upload_data, day_path):
            logs.log_error("Failed to upload FuzzerRun.")