def gcp(repository, revision, report, platform, suite): """ Upload a grcov raw report on Google Cloud Storage * Compress with zstandard * Upload on bucket using revision in name * Trigger ingestion on channel's backend """ assert isinstance(report, bytes) assert isinstance(platform, str) assert isinstance(suite, str) bucket = get_bucket(secrets[secrets.GOOGLE_CLOUD_STORAGE]) # Compress report compressor = zstd.ZstdCompressor() archive = compressor.compress(report) # Upload archive path = GCP_COVDIR_PATH.format(repository=repository, revision=revision, platform=platform, suite=suite) blob = bucket.blob(path) blob.upload_from_string(archive) # Update headers blob.content_type = "application/json" blob.content_encoding = "zstd" blob.patch() logger.info("Uploaded {} on {}".format(path, bucket)) # Trigger ingestion on backend gcp_ingest(repository, revision, platform, suite) return blob
def gcp(repository, revision, report): """ Upload a grcov raw report on Google Cloud Storage * Compress with zstandard * Upload on bucket using revision in name * Trigger ingestion on channel's backend """ assert isinstance(report, dict) bucket = get_bucket(secrets[secrets.GOOGLE_CLOUD_STORAGE]) # Compress report compressor = zstd.ZstdCompressor() archive = compressor.compress(json.dumps(report).encode("utf-8")) # Upload archive path = GCP_COVDIR_PATH.format(repository=repository, revision=revision) blob = bucket.blob(path) blob.upload_from_string(archive) # Update headers blob.content_type = "application/json" blob.content_encoding = "zstd" blob.patch() logger.info("Uploaded {} on {}".format(path, bucket)) # Trigger ingestion on backend retry(lambda: gcp_ingest(repository, revision), retries=10, wait_between_retries=60) return blob
def gcp_covdir_exists(repository, revision): """ Check if a covdir report exists on the Google Cloud Storage bucket """ bucket = get_bucket(secrets[secrets.GOOGLE_CLOUD_STORAGE]) path = GCP_COVDIR_PATH.format(repository=repository, revision=revision) blob = bucket.blob(path) return blob.exists()
def run(self): # Check the covdir report does not already exists bucket = gcp.get_bucket(secrets[secrets.GOOGLE_CLOUD_STORAGE]) if uploader.gcp_covdir_exists(bucket, self.branch, self.revision, "all", "all"): logger.warn("Full covdir report already on GCP") return self.retrieve_source_and_artifacts() self.check_javascript_files() reports = self.build_reports() logger.info("Built all covdir reports", nb=len(reports)) # Retrieve the full report full_path = reports.get(("all", "all")) assert full_path is not None, "Missing full report (all:all)" with open(full_path, "r") as f: report = json.load(f) # Check extensions paths = uploader.covdir_paths(report) for extension in [".js", ".cpp"]: assert any( path.endswith(extension) for path in paths), "No {} file in the generated report".format(extension) # Upload reports on GCP self.upload_reports(reports) logger.info("Uploaded all covdir reports", nb=len(reports)) # Upload coverage on phabricator changesets = self.get_hgmo_changesets() coverage = self.upload_phabricator(report, changesets) # Send an email on low coverage notify_email(self.revision, changesets, coverage) logger.info("Sent low coverage email notification") # Index on Taskcluster self.index_task([ "project.relman.code-coverage.{}.repo.mozilla-central.{}".format( secrets[secrets.APP_CHANNEL], self.revision), "project.relman.code-coverage.{}.repo.mozilla-central.latest". format(secrets[secrets.APP_CHANNEL]), ])
def __init__(self, reports_dir=None): # Open redis connection self.redis = redis.from_url(taskcluster.secrets["REDIS_URL"]) assert self.redis.ping(), "Redis server does not ping back" # Open gcp connection to bucket assert (taskcluster.secrets["GOOGLE_CLOUD_STORAGE"] is not None), "Missing GOOGLE_CLOUD_STORAGE secret" self.bucket = get_bucket(taskcluster.secrets["GOOGLE_CLOUD_STORAGE"]) # Local storage for reports self.reports_dir = reports_dir or os.path.join(tempfile.gettempdir(), "ccov-reports") os.makedirs(self.reports_dir, exist_ok=True) logger.info("Reports will be stored in {}".format(self.reports_dir)) # Load most recent reports in cache for repo in REPOSITORIES: for report in self.list_reports(repo, nb=1): self.download_report(report)
def trigger_missing(server_address: str, out_dir: str = ".") -> None: triggered_revisions_path = os.path.join(out_dir, "triggered_revisions.zst") url = f"https://firefox-ci-tc.services.mozilla.com/api/index/v1/task/project.relman.code-coverage.{secrets[secrets.APP_CHANNEL]}.cron.latest/artifacts/public/triggered_revisions.zst" # noqa r = requests.head(url, allow_redirects=True) if r.status_code != 404: utils.download_file(url, triggered_revisions_path) try: dctx = zstandard.ZstdDecompressor() with open(triggered_revisions_path, "rb") as zf: with dctx.stream_reader(zf) as reader: with io.TextIOWrapper(reader, encoding="ascii") as f: triggered_revisions = set(rev for rev in f.read().splitlines()) except FileNotFoundError: triggered_revisions = set() # Get all mozilla-central revisions from the past year. days = 365 if secrets[secrets.APP_CHANNEL] == "production" else 30 a_year_ago = datetime.utcnow() - timedelta(days=days) with hgmo.HGMO(server_address=server_address) as hgmo_server: data = hgmo_server.get_pushes( startDate=a_year_ago.strftime("%Y-%m-%d"), full=False, tipsonly=True) revisions = [(push_data["changesets"][0], int(push_data["date"])) for push_data in data["pushes"].values()] logger.info(f"{len(revisions)} pushes in the past year") assert (secrets[secrets.GOOGLE_CLOUD_STORAGE] is not None), "Missing GOOGLE_CLOUD_STORAGE secret" bucket = get_bucket(secrets[secrets.GOOGLE_CLOUD_STORAGE]) missing_revisions = [] for revision, timestamp in revisions: # Skip revisions that have already been triggered. If they are still missing, # it means there is a problem that is preventing us from ingesting them. if revision in triggered_revisions: continue # If the revision was already ingested, we don't need to trigger ingestion for it again. if uploader.gcp_covdir_exists(bucket, "mozilla-central", revision, "all", "all"): triggered_revisions.add(revision) continue missing_revisions.append((revision, timestamp)) logger.info(f"{len(missing_revisions)} missing pushes in the past year") yesterday = int(datetime.timestamp(datetime.utcnow() - timedelta(days=1))) task_group_id = slugId() logger.info(f"Triggering tasks in the {task_group_id} group") triggered = 0 for revision, timestamp in reversed(missing_revisions): # If it's older than yesterday, we assume the group finished. # If it is newer than yesterday, we load the group and check if all tasks in it finished. if timestamp > yesterday: decision_task_id = taskcluster.get_decision_task( "mozilla-central", revision) if decision_task_id is None: continue group = taskcluster.get_task_details( decision_task_id)["taskGroupId"] if not all(task["status"]["state"] in taskcluster.FINISHED_STATUSES for task in taskcluster.get_tasks_in_group(group) if taskcluster.is_coverage_task(task["task"])): continue trigger_task(task_group_id, revision) triggered_revisions.add(revision) triggered += 1 if triggered == MAXIMUM_TRIGGERS: break cctx = zstandard.ZstdCompressor(threads=-1) with open(triggered_revisions_path, "wb") as zf: with cctx.stream_writer(zf) as compressor: with io.TextIOWrapper(compressor, encoding="ascii") as f: f.write("\n".join(triggered_revisions))
def generate(server_address: str, repo_dir: str, out_dir: str = ".") -> None: start_time = time.monotonic() commit_coverage_path = os.path.join(out_dir, "commit_coverage.json.zst") assert ( secrets[secrets.GOOGLE_CLOUD_STORAGE] is not None ), "Missing GOOGLE_CLOUD_STORAGE secret" bucket = get_bucket(secrets[secrets.GOOGLE_CLOUD_STORAGE]) blob = bucket.blob("commit_coverage.json.zst") if blob.exists(): dctx = zstandard.ZstdDecompressor() commit_coverage = json.loads(dctx.decompress(blob.download_as_bytes())) else: commit_coverage = {} cctx = zstandard.ZstdCompressor(threads=-1) def _upload(): blob = bucket.blob("commit_coverage.json.zst") blob.upload_from_string( cctx.compress(json.dumps(commit_coverage).encode("ascii")) ) blob.content_type = "application/json" blob.content_encoding = "zstd" blob.patch() # We are only interested in "overall" coverage, not platform or suite specific. changesets_to_analyze = [ changeset for changeset, platform, suite in list_reports(bucket, "mozilla-central") if platform == DEFAULT_FILTER and suite == DEFAULT_FILTER ] # Skip already analyzed changesets. changesets_to_analyze = [ changeset for changeset in changesets_to_analyze if changeset not in commit_coverage ] # Use the local server to generate the coverage mapping, as it is faster and # correct. def analyze_changeset(changeset_to_analyze: str) -> None: report_name = get_name( "mozilla-central", changeset_to_analyze, DEFAULT_FILTER, DEFAULT_FILTER ) assert download_report( os.path.join(out_dir, "ccov-reports"), bucket, report_name ) with open( os.path.join(out_dir, "ccov-reports", f"{report_name}.json"), "r" ) as f: report = json.load(f) phabricatorUploader = PhabricatorUploader( repo_dir, changeset_to_analyze, warnings_enabled=False ) # Use the hg.mozilla.org server to get the automation relevant changesets, since # this information is broken in our local repo (which mozilla-unified). with hgmo.HGMO(server_address=server_address) as hgmo_remote_server: changesets = hgmo_remote_server.get_automation_relevance_changesets( changeset_to_analyze ) results = phabricatorUploader.generate(thread_local.hg, report, changesets) for changeset in changesets: # Lookup changeset coverage from phabricator uploader coverage = results.get(changeset["node"]) if coverage is None: logger.info("No coverage found", changeset=changeset) commit_coverage[changeset["node"]] = None continue commit_coverage[changeset["node"]] = { "added": sum(c["lines_added"] for c in coverage["paths"].values()), "covered": sum(c["lines_covered"] for c in coverage["paths"].values()), "unknown": sum(c["lines_unknown"] for c in coverage["paths"].values()), } max_workers = min(32, (os.cpu_count() or 1) + 4) logger.info(f"Analyzing {len(changesets_to_analyze)} with {max_workers} workers") with ThreadPoolExecutorResult( initializer=_init_thread, initargs=(repo_dir,) ) as executor: futures = [ executor.submit(analyze_changeset, changeset) for changeset in changesets_to_analyze ] for changeset, future in tqdm( zip(changesets_to_analyze, concurrent.futures.as_completed(futures)), total=len(futures), ): exc = future.exception() if exc is not None: logger.error(f"Exception {exc} while analyzing {changeset}") if time.monotonic() - start_time >= 600: _upload() start_time = time.monotonic() while len(hg_servers) > 0: hg_server = hg_servers.pop() hg_server.close() _upload() with open(commit_coverage_path, "wb") as zf: with cctx.stream_writer(zf) as compressor: with io.TextIOWrapper(compressor, encoding="ascii") as f: json.dump(commit_coverage, f)
def run(self): # Check the covdir report does not already exists bucket = gcp.get_bucket(secrets[secrets.GOOGLE_CLOUD_STORAGE]) if uploader.gcp_covdir_exists(bucket, self.branch, self.revision, "all", "all"): logger.warn("Full covdir report already on GCP") return # Generate and upload the full report as soon as possible, so it is available # for consumers (e.g. Searchfox) right away. self.retrieve_source_and_artifacts() reports = self.build_reports(only=[("all", "all")]) full_path = reports.get(("all", "all")) assert full_path is not None, "Missing full report (all:all)" with open(full_path, "r") as f: report_text = f.read() # Upload report as an artifact. taskcluster_config.upload_artifact( "public/code-coverage-report.json", report_text, "application/json", timedelta(days=14), ) # Index on Taskcluster self.index_task([ "project.relman.code-coverage.{}.repo.mozilla-central.{}".format( secrets[secrets.APP_CHANNEL], self.revision), "project.relman.code-coverage.{}.repo.mozilla-central.latest". format(secrets[secrets.APP_CHANNEL]), ]) report = json.loads(report_text) # Check extensions paths = uploader.covdir_paths(report) for extension in [".js", ".cpp"]: assert any( path.endswith(extension) for path in paths), "No {} file in the generated report".format(extension) # Upload coverage on phabricator changesets = self.get_hgmo_changesets() coverage = self.upload_phabricator(report, changesets) # Send an email on low coverage notify_email(self.revision, changesets, coverage) logger.info("Sent low coverage email notification") self.check_javascript_files() # Generate all reports except the full one which we generated earlier. all_report_combinations = self.artifactsHandler.get_combinations() del all_report_combinations[("all", "all")] reports.update(self.build_reports()) logger.info("Built all covdir reports", nb=len(reports)) # Upload reports on GCP self.upload_reports(reports) logger.info("Uploaded all covdir reports", nb=len(reports))