def from_issue(cls, issue): """Construct TriageInfo from the supplied issue""" info = TriageInfo() info.issue = issue labels = graphql.unpack_and_split_nodes(issue, ["labels", "edges"]) project_cards = graphql.unpack_and_split_nodes(issue, ["projectCards", "edges"]) for l in labels: name = l["name"] if name in ALLOWED_KINDS: info.missing_kind = False if name in ALLOWED_PRIORITY: info.missing_priority = False if not name in REQUIRES_PROJECT: info.missing_project = False else: if project_cards: info.missing_project = False if name.startswith("area") or name.startswith("community"): info.missing_area= False for c in project_cards: if c.get("project").get("name") == TRIAGE_PROJECT: info.triage_project_card = c break return info
def from_issue(cls, issue): """Construct TriageInfo from the supplied issue""" info = TriageInfo() info.issue = issue labels = graphql.unpack_and_split_nodes(issue, ["labels", "edges"]) project_cards = graphql.unpack_and_split_nodes(issue, ["projectCards", "edges"]) events = graphql.unpack_and_split_nodes(issue, ["timelineItems", "edges"]) for l in labels: name = l["name"] if name in ALLOWED_PRIORITY: info.requires_project = name in REQUIRES_PROJECT for c in project_cards: if c.get("project").get("name") == TRIAGE_PROJECT: info.triage_project_card = c break # TODO(jlewi): Could we potentially miss some events since we aren't # paginating through all events for an issue? This should no longer # be an issue because _process_issue will call _get_issue and paginate # through all results. for e in events: if not "createdAt" in e: continue t = dateutil_parser.parse(e.get("createdAt")) if e.get("__typename") == "LabeledEvent": name = e.get("label").get("name") if name.startswith("kind"): if info.kind_time: continue info.kind_time = t if name.startswith("area") or name.startswith("platform"): if info.area_time: continue info.area_time = t if name in ALLOWED_PRIORITY: if info.priority_time: continue info.priority_time = t if e.get("__typename") == "AddedToProjectEvent": if info.project_time: continue info.project_time = t if issue.get("closedAt"): info.closed_at = dateutil_parser.parse(issue.get("closedAt")) return info
def update_kanban_board(self): """Checks if any issues in the needs triage board can be removed. """ query = """query getIssues($issueCursor: String) { search(type: ISSUE, query: "is:open is:issue org:kubeflow project:kubeflow/26", first: 100, after: $issueCursor) { issueCount pageInfo { endCursor hasNextPage } edges { node { __typename ... on Issue { author { __typename ... on User { login } ... on Bot { login } } id title body url state createdAt closedAt labels(first: 30) { totalCount edges { node { name } } } projectCards(first: 30) { totalCount pageInfo { endCursor hasNextPage } edges { node { id project { name number } } } } timelineItems(first: 30) { totalCount pageInfo { endCursor hasNextPage } edges { node { __typename ... on AddedToProjectEvent { createdAt } ... on LabeledEvent { createdAt label { name } } ... on ClosedEvent { createdAt } } } } } } } } } """ issues_cursor = None has_next_issues_page = True while has_next_issues_page: variables = { "issueCursor": issues_cursor, } results = self.client.run_query(query, variables=variables) if results.get("errors"): message = json.dumps(results.get("errors")) logging.error(f"There was a problem issuing the query; errors:\n{message}\n") return issues = graphql.unpack_and_split_nodes( results, ["data", "search", "edges"]) for i in issues: self._process_issue(i) page_info = results["data"]["search"]["pageInfo"] issues_cursor = page_info["endCursor"] has_next_issues_page = page_info["hasNextPage"]
def _iter_issues(self, org, repo, issue_filter=None, output=None): """Iterate over issues in batches for a repository Args: org: The org that owns the repository repo: The directory for the repository output: The directory to write the results; if not specified results are not downloaded issue_filter: Used to filter issues to consider based on when they were last updated Writes the issues along with the first comments to a file in output directory. """ client = graphql.GraphQLClient() num_issues_per_page = 100 if not issue_filter: today = datetime.datetime.now() today = datetime.datetime(year=today.year, month=today.month, day=today.day) start_time = today - datetime.timedelta(days=60) # Labels and projects are available via timeline events. # However, in timeline events project info (e.g. actual project name) # is only in developer preview. # The advantage of using labels and projectCards (as opposed to timeline # events) is that its much easier to bound the number of items we need # to fetch in order to return all labels and projects # for timeline items its much more likely the labels and projects we care # about will require pagination. # # TODO(jlewi): We should add a method to fetch all issue timeline items # via pagination in the case the number of items exceeds the page size. # # TODO(jlewi): We need to consider closed issues if we want to compute # stats. # # TODO(jlewi): We should support fetching only OPEN issues; if we are # deciding which issues need triage or have been triaged we really only # need to look at open isues. Closed Issues will automatically move to # the appropriate card in the Kanban board. query = """query getIssues($org: String!, $repo: String!, $pageSize: Int, $issueCursor: String, $filter: IssueFilters) { repository(owner: $org, name: $repo) { issues(first: $pageSize, filterBy: $filter, after: $issueCursor) { totalCount pageInfo { endCursor hasNextPage } edges { node { author { __typename ... on User { login } ... on Bot { login } } id title body url state createdAt closedAt labels(first: 30) { totalCount edges { node { name } } } projectCards(first: 30) { totalCount pageInfo { endCursor hasNextPage } edges { node { id project { name number } } } } timelineItems(first: 30) { totalCount pageInfo { endCursor hasNextPage } edges { node { __typename ... on AddedToProjectEvent { createdAt } ... on LabeledEvent { createdAt label { name } } ... on ClosedEvent { createdAt } } } } } } } } } """ shard = 0 num_pages = None if output and not os.path.exists(output): os.makedirs(output) total_issues = None has_next_issues_page = True # TODO(jlewi): We should persist the cursors to disk so we can resume # after errors issues_cursor = None shard_writer = None if not issue_filter: start_time = datetime.datetime.now() - datetime.timedelta(weeks=24) issue_filter = { "since": start_time.isoformat(), } while has_next_issues_page: variables = { "org": org, "repo": repo, "pageSize": num_issues_per_page, "issueCursor": issues_cursor, "filter": issue_filter, } results = client.run_query(query, variables=variables) if results.get("errors"): message = json.dumps(results.get("errors")) logging.error(f"There was a problem issuing the query; errors:\n{message}\n") return if not total_issues: total_issues = results["data"]["repository"]["issues"]["totalCount"] num_pages = int(np.ceil(total_issues/float(num_issues_per_page))) logging.info("%s/%s has a total of %s issues", org, repo, total_issues) if output and not shard_writer: logging.info("initializing the shard writer") shard_writer = graphql.ShardWriter(num_pages, output, prefix="issues-{0}-{1}".format(org, repo)) issues = graphql.unpack_and_split_nodes( results, ["data", "repository", "issues", "edges"]) yield issues if shard_writer: shard_writer.write_shard(issues) page_info = results["data"]["repository"]["issues"]["pageInfo"] issues_cursor = page_info["endCursor"] has_next_issues_page = page_info["hasNextPage"]
def _iter_issues(self, org, repo, output=None): """Iterate over issues in batches for a repository Args: org: The org that owns the repository repo: The directory for the repository output: The directory to write the results; if not specified results are not downloaded Writes the issues along with the first comments to a file in output directory. """ client = graphql.GraphQLClient() num_issues_per_page = 100 # TODO(jlewi):Use query variables # TODO(jlewi): query_template = """{{ repository(owner: "{org}", name: "{repo}") {{ issues(first:{num_issues_per_page}, states: OPEN, {issues_cursor}) {{ totalCount pageInfo {{ endCursor hasNextPage }} edges{{ node {{ author {{ __typename ... on User {{ login }} ... on Bot{{ login }} }} id title body url state labels(first:30, ){{ totalCount edges {{ node {{ name }} }} }} projectCards(first:30, ){{ totalCount edges {{ node {{ id project {{ name number }} }} }} }} }} }} }} }} }} """ shard = 0 num_pages = None if output and not os.path.exists(output): os.makedirs(output) total_issues = None has_next_issues_page = True # TODO(jlewi): We should persist the cursors to disk so we can resume # after errors issues_cursor = None shard_writer = None while has_next_issues_page: issues_cursor_text = "" if issues_cursor: issues_cursor_text = "after:\"{0}\"".format(issues_cursor) query = query_template.format(org=org, repo=repo, num_issues_per_page=num_issues_per_page, issues_cursor=issues_cursor_text) results = client.run_query(query) if results.get("errors"): message = json.dumps(results.get("errors")) logging.error("There was a problem issuing the query; errors:\n%s", "\n", message) return if not total_issues: total_issues = results["data"]["repository"]["issues"]["totalCount"] num_pages = int(np.ceil(total_issues/float(num_issues_per_page))) logging.info("%s/%s has a total of %s issues", org, repo, total_issues) if output and not shard_writer: logging.info("initializing the shard writer") shard_writer = graphql.ShardWriter(num_pages, output, prefix="issues-{0}-{1}".format(org, repo)) issues = graphql.unpack_and_split_nodes( results, ["data", "repository", "issues", "edges"]) yield issues if shard_writer: shard_writer.write_shard(issues) page_info = results["data"]["repository"]["issues"]["pageInfo"] issues_cursor = page_info["endCursor"] has_next_issues_page = page_info["hasNextPage"]
def _iter_prs(self, org, repo): """Iterate over open PRs in the specified repo. Args: org: The org that owns the repository repo: The directory for the repository issue_filter: Used to filter issues to consider based on when they were last updated Writes the issues along with the first comments to a file in output directory. """ num_prs_per_page = 25 query = """query getIssues($org: String!, $repo: String!, $pageSize: Int, $issueCursor: String,) { repository(owner: $org, name: $repo) { pullRequests(first: $pageSize, after: $issueCursor, states: [OPEN]) { totalCount pageInfo { endCursor hasNextPage } edges { node { author { __typename ... on User { login } ... on Bot { login } } id number title url state headRefName createdAt closedAt labels(first: 30) { totalCount edges { node { name } } } } } } } } """ total_prs = None has_next_prs_page = True # TODO(jlewi): We should persist the cursors to disk so we can resume # after errors prs_cursor = None while has_next_prs_page: variables = { "org": org, "repo": repo, "pageSize": num_prs_per_page, "issueCursor": prs_cursor, } results = self._run_query(query, variables=variables) if results.get("errors"): message = json.dumps(results.get("errors")) logging.error( f"There was a problem issuing the query; errors:\n{message}\n" ) return if not total_prs: total_prs = results["data"]["repository"]["pullRequests"][ "totalCount"] logging.info("%s/%s has a total of %s pullRequests", org, repo, total_prs) prs = graphql.unpack_and_split_nodes( results, ["data", "repository", "pullRequests", "edges"]) for pr in prs: yield pr page_info = results["data"]["repository"]["pullRequests"][ "pageInfo"] prs_cursor = page_info["endCursor"] has_next_prs_page = page_info["hasNextPage"]