def _iter_issues(self, org, repo, issue_filter=None, output=None): """Iterate over issues in batches for a repository Args: org: The org that owns the repository repo: The directory for the repository output: The directory to write the results; if not specified results are not downloaded issue_filter: Used to filter issues to consider based on when they were last updated Writes the issues along with the first comments to a file in output directory. """ client = graphql.GraphQLClient() num_issues_per_page = 100 if not issue_filter: today = datetime.datetime.now() today = datetime.datetime(year=today.year, month=today.month, day=today.day) start_time = today - datetime.timedelta(days=60) # Labels and projects are available via timeline events. # However, in timeline events project info (e.g. actual project name) # is only in developer preview. # The advantage of using labels and projectCards (as opposed to timeline # events) is that its much easier to bound the number of items we need # to fetch in order to return all labels and projects # for timeline items its much more likely the labels and projects we care # about will require pagination. # # TODO(jlewi): We should add a method to fetch all issue timeline items # via pagination in the case the number of items exceeds the page size. # # TODO(jlewi): We need to consider closed issues if we want to compute # stats. # # TODO(jlewi): We should support fetching only OPEN issues; if we are # deciding which issues need triage or have been triaged we really only # need to look at open isues. Closed Issues will automatically move to # the appropriate card in the Kanban board. query = """query getIssues($org: String!, $repo: String!, $pageSize: Int, $issueCursor: String, $filter: IssueFilters) { repository(owner: $org, name: $repo) { issues(first: $pageSize, filterBy: $filter, after: $issueCursor) { totalCount pageInfo { endCursor hasNextPage } edges { node { author { __typename ... on User { login } ... on Bot { login } } id title body url state createdAt closedAt labels(first: 30) { totalCount edges { node { name } } } projectCards(first: 30) { totalCount pageInfo { endCursor hasNextPage } edges { node { id project { name number } } } } timelineItems(first: 30) { totalCount pageInfo { endCursor hasNextPage } edges { node { __typename ... on AddedToProjectEvent { createdAt } ... on LabeledEvent { createdAt label { name } } ... on ClosedEvent { createdAt } } } } } } } } } """ shard = 0 num_pages = None if output and not os.path.exists(output): os.makedirs(output) total_issues = None has_next_issues_page = True # TODO(jlewi): We should persist the cursors to disk so we can resume # after errors issues_cursor = None shard_writer = None if not issue_filter: start_time = datetime.datetime.now() - datetime.timedelta(weeks=24) issue_filter = { "since": start_time.isoformat(), } while has_next_issues_page: variables = { "org": org, "repo": repo, "pageSize": num_issues_per_page, "issueCursor": issues_cursor, "filter": issue_filter, } results = client.run_query(query, variables=variables) if results.get("errors"): message = json.dumps(results.get("errors")) logging.error(f"There was a problem issuing the query; errors:\n{message}\n") return if not total_issues: total_issues = results["data"]["repository"]["issues"]["totalCount"] num_pages = int(np.ceil(total_issues/float(num_issues_per_page))) logging.info("%s/%s has a total of %s issues", org, repo, total_issues) if output and not shard_writer: logging.info("initializing the shard writer") shard_writer = graphql.ShardWriter(num_pages, output, prefix="issues-{0}-{1}".format(org, repo)) issues = graphql.unpack_and_split_nodes( results, ["data", "repository", "issues", "edges"]) yield issues if shard_writer: shard_writer.write_shard(issues) page_info = results["data"]["repository"]["issues"]["pageInfo"] issues_cursor = page_info["endCursor"] has_next_issues_page = page_info["hasNextPage"]
def _iter_issues(self, org, repo, output=None): """Iterate over issues in batches for a repository Args: org: The org that owns the repository repo: The directory for the repository output: The directory to write the results; if not specified results are not downloaded Writes the issues along with the first comments to a file in output directory. """ client = graphql.GraphQLClient() num_issues_per_page = 100 # TODO(jlewi):Use query variables # TODO(jlewi): query_template = """{{ repository(owner: "{org}", name: "{repo}") {{ issues(first:{num_issues_per_page}, states: OPEN, {issues_cursor}) {{ totalCount pageInfo {{ endCursor hasNextPage }} edges{{ node {{ author {{ __typename ... on User {{ login }} ... on Bot{{ login }} }} id title body url state labels(first:30, ){{ totalCount edges {{ node {{ name }} }} }} projectCards(first:30, ){{ totalCount edges {{ node {{ id project {{ name number }} }} }} }} }} }} }} }} }} """ shard = 0 num_pages = None if output and not os.path.exists(output): os.makedirs(output) total_issues = None has_next_issues_page = True # TODO(jlewi): We should persist the cursors to disk so we can resume # after errors issues_cursor = None shard_writer = None while has_next_issues_page: issues_cursor_text = "" if issues_cursor: issues_cursor_text = "after:\"{0}\"".format(issues_cursor) query = query_template.format(org=org, repo=repo, num_issues_per_page=num_issues_per_page, issues_cursor=issues_cursor_text) results = client.run_query(query) if results.get("errors"): message = json.dumps(results.get("errors")) logging.error("There was a problem issuing the query; errors:\n%s", "\n", message) return if not total_issues: total_issues = results["data"]["repository"]["issues"]["totalCount"] num_pages = int(np.ceil(total_issues/float(num_issues_per_page))) logging.info("%s/%s has a total of %s issues", org, repo, total_issues) if output and not shard_writer: logging.info("initializing the shard writer") shard_writer = graphql.ShardWriter(num_pages, output, prefix="issues-{0}-{1}".format(org, repo)) issues = graphql.unpack_and_split_nodes( results, ["data", "repository", "issues", "edges"]) yield issues if shard_writer: shard_writer.write_shard(issues) page_info = results["data"]["repository"]["issues"]["pageInfo"] issues_cursor = page_info["endCursor"] has_next_issues_page = page_info["hasNextPage"]