def query_data(repo_meta, commit): """ Find the right event base sha to get the right list of commits This is not an issue in GithubPushTransformer because the PushEvent from Taskcluster already contains the data """ # This is used for the `compare` API. The "event.base.sha" is only contained in Pulse events, thus, # we need to determine the correct value event_base_sha = repo_meta["branch"] # First we try with `master` being the base sha # e.g. https://api.github.com/repos/servo/servo/compare/master...1418c0555ff77e5a3d6cf0c6020ba92ece36be2e compareResponse = github.compare_shas(repo_meta["owner"], repo_meta["repo"], repo_meta["branch"], commit) merge_base_commit = compareResponse.get("merge_base_commit") if merge_base_commit: commiter_date = merge_base_commit["commit"]["committer"]["date"] # Since we don't use PushEvents that contain the "before" or "event.base.sha" fields [1] # we need to discover the right parent which existed in the base branch. # [1] https://github.com/taskcluster/taskcluster/blob/3dda0adf85619d18c5dcf255259f3e274d2be346/services/github/src/api.js#L55 parents = compareResponse["merge_base_commit"]["parents"] if len(parents) == 1: parent = parents[0] commit_info = fetch_json(parent["url"]) committer_date = commit_info["commit"]["committer"]["date"] # All commits involved in a PR share the same committer's date if merge_base_commit["commit"]["committer"][ "date"] == committer_date: # Recursively find the forking parent event_base_sha, _ = query_data(repo_meta, parent["sha"]) else: event_base_sha = parent["sha"] else: for parent in parents: _commit = fetch_json(parent["url"]) # All commits involved in a merge share the same committer's date if commiter_date != _commit["commit"]["committer"]["date"]: event_base_sha = _commit["sha"] break # This is to make sure that the value has changed assert event_base_sha != repo_meta["branch"] logger.info("We have a new base: %s", event_base_sha) # When using the correct event_base_sha the "commits" field will be correct compareResponse = github.compare_shas(repo_meta["owner"], repo_meta["repo"], event_base_sha, commit) commits = [] for _commit in compareResponse["commits"]: commits.append({ "message": _commit["commit"]["message"], "author": _commit["commit"]["author"], "committer": _commit["commit"]["committer"], "id": _commit["sha"], }) return event_base_sha, commits
def extract(self, url): try: return fetch_json(url) except requests.exceptions.HTTPError as e: logger.warning("HTTPError %s fetching: %s", e.response.status_code, url) raise
def fetch_push(self, url, repository): params = {} params.update(self.CREDENTIALS) logger.info("Fetching push details: %s", url) commits = self.get_cleaned_commits(fetch_json(url, params)) head_commit = commits[-1] push = { "revision": head_commit["sha"], # A push can be co-authored # The author's date is when the code was committed locally by the author # The committer's date is the info as to when the PR is merged (committed) into master "push_timestamp": to_timestamp(head_commit["commit"]["committer"]["date"]), # We want the original author's email to show up in the UI "author": head_commit["commit"]["author"]["email"], } revisions = [] for commit in commits: revisions.append({ "comment": commit["commit"]["message"], "author": u"{} <{}>".format(commit["commit"]["author"]["name"], commit["commit"]["author"]["email"]), "revision": commit["sha"] }) push["revisions"] = revisions return push
def fetch_data(self): url = 'https://bugzilla.mozilla.org/latest/configuration' product_security_group_data = None exception = None try: product_security_group_data = fetch_json(url) except Exception as e: exception = e return { "url": url, "product_security_group_data": product_security_group_data, "exception": exception, }
def fetch_data(self, project): url = ( 'https://firefox-ci-tc.services.mozilla.com/api/index/v1/task/gecko.v2.%s.latest.source.source-bugzilla-info/artifacts/public/components.json' % project) files_bugzilla_data = None exception = None try: files_bugzilla_data = fetch_json(url) except Exception as e: exception = e return { "url": url, "files_bugzilla_data": files_bugzilla_data, "exception": exception, }
def query_latest_gecko_decision_task_id(project): url = TASKCLUSTER_INDEX_URL % project logger.info('Fetching %s', url) try: latest_task = fetch_json(url) task_id = latest_task['taskId'] logger.info('For %s we found the task id: %s', project, task_id) except requests.exceptions.HTTPError as e: # Specifically handle 404 errors, as it means there's no decision task on this push if e.response.status_code == 404: logger.info('For %s we did not find a task id', project) task_id = None else: raise return task_id
def fetch_intermittent_bugs(offset, limit): url = settings.BZ_API_URL + '/rest/bug' params = { 'keywords': 'intermittent-failure', 'chfieldfrom': '-1y', 'include_fields': ('id,summary,status,resolution,op_sys,cf_crash_signature,' 'keywords,last_change_time, whiteboard'), 'offset': offset, 'limit': limit, } response = fetch_json(url, params=params) return response.get('bugs', [])
def _taskcluster_runnable_jobs(project): decision_task_id = query_latest_gecko_decision_task_id(project) # Some trees (e.g. comm-central) don't have a decision task, which means there are no taskcluster runnable jobs if not decision_task_id: return [] for run_number in range(0, 5): tc_graph_url = RUNNABLE_JOBS_URL.format(task_id=decision_task_id, run_number=run_number) validate = URLValidator() try: validate(tc_graph_url) except ValidationError: logger.warning('Failed to validate %s', tc_graph_url) return [] try: tc_graph = fetch_json(tc_graph_url) except requests.exceptions.HTTPError as e: logger.info('HTTPError %s when getting taskgraph at %s', e.response.status_code, tc_graph_url) continue return [ { 'build_platform': node.get('platform', ''), 'build_system_type': 'taskcluster', 'job_group_name': node.get('groupName', ''), 'job_group_symbol': node.get('groupSymbol', ''), 'job_type_name': label, 'job_type_symbol': node['symbol'], 'platform': node.get('platform'), 'platform_option': ' '.join(node.get('collection', {}).keys()), 'ref_data_name': label, 'state': 'runnable', 'result': 'runnable', } for label, node in tc_graph.items() ] return []
def fetch_intermittent_bugs(additional_params, limit, duplicate_chain_length): url = settings.BZ_API_URL + '/rest/bug' params = { 'include_fields': ','.join([ 'id', 'summary', 'status', 'resolution', 'dupe_of', 'duplicates', 'cf_crash_signature', 'keywords', 'last_change_time', 'whiteboard', ]), 'limit': limit, } params.update(additional_params) response = fetch_json(url, params=params) return response.get('bugs', [])
def fetch_push(self, url, repository, sha=None): newrelic.agent.add_custom_parameter("sha", sha) logger.debug("fetching for %s %s", repository, url) # there will only ever be one, with this url push = list(fetch_json(url)["pushes"].values())[0] commits = [] # we only want to ingest the last 200 commits for each push, # to protect against the 5000+ commit merges on release day uplift. for commit in push['changesets'][-200:]: commits.append({ "revision": commit["node"], "author": commit["author"], "comment": commit["desc"], }) return { "revision": commits[-1]["revision"], "author": push["user"], "push_timestamp": push["date"], "revisions": commits, }
def last_push_id_from_server(repo): """Obtain the last push ID from a ``Repository`` instance.""" url = '%s/json-pushes/?version=2' % repo.url data = fetch_json(url) return data['lastpushid']