Ejemplo n.º 1
0
  def _iter_issues(self, org, repo, issue_filter=None, output=None):
    """Iterate over issues in batches for a repository

    Args:
      org: The org that owns the repository
      repo: The directory for the repository
      output: The directory to write the results; if not specified results
        are not downloaded
      issue_filter: Used to filter issues to consider based on when they were
        last updated

    Writes the issues along with the first comments to a file in output
    directory.
    """
    client = graphql.GraphQLClient()

    num_issues_per_page = 100

    if not issue_filter:
      today = datetime.datetime.now()
      today = datetime.datetime(year=today.year, month=today.month, day=today.day)

      start_time = today - datetime.timedelta(days=60)

    # Labels and projects are available via timeline events.
    # However, in timeline events project info (e.g. actual project name)
    # is only in developer preview.
    # The advantage of using labels and projectCards (as opposed to timeline
    # events) is that its much easier to bound the number of items we need
    # to fetch in order to return all labels and projects
    # for timeline items its much more likely the labels and projects we care
    # about will require pagination.
    #
    # TODO(jlewi): We should add a method to fetch all issue timeline items
    # via pagination in the case the number of items exceeds the page size.
    #
    # TODO(jlewi): We need to consider closed issues if we want to compute
    # stats.
    #
    # TODO(jlewi): We should support fetching only OPEN issues; if we are
    # deciding which issues need triage or have been triaged we really only
    # need to look at open isues. Closed Issues will automatically move to
    # the appropriate card in the Kanban board.
    query = """query getIssues($org: String!, $repo: String!, $pageSize: Int, $issueCursor: String, $filter: IssueFilters) {
  repository(owner: $org, name: $repo) {
    issues(first: $pageSize, filterBy: $filter, after: $issueCursor) {
      totalCount
      pageInfo {
        endCursor
        hasNextPage
      }
      edges {
        node {
          author {
            __typename
            ... on User {
              login
            }
            ... on Bot {
              login
            }
          }
          id
          title
          body
          url
          state
          createdAt
          closedAt
          labels(first: 30) {
            totalCount
            edges {
              node {
                name
              }
            }
          }
          projectCards(first: 30) {
            totalCount
            pageInfo {
              endCursor
              hasNextPage
            }
            edges {
              node {
                id
                project {
                  name
                  number
                }
              }
            }
          }
          timelineItems(first: 30) {
            totalCount
            pageInfo {
              endCursor
              hasNextPage
            }
            edges {
              node {
                __typename
                ... on AddedToProjectEvent {
                  createdAt

                }
                ... on LabeledEvent {
                  createdAt
                  label {
                    name
                  }
                }
                ... on ClosedEvent {
                  createdAt
                }
              }
            }
          }
        }
      }
    }
  }
}
"""

    shard = 0
    num_pages = None
    if output and not os.path.exists(output):
      os.makedirs(output)

    total_issues = None
    has_next_issues_page = True
    # TODO(jlewi): We should persist the cursors to disk so we can resume
    # after errors
    issues_cursor = None
    shard_writer = None

    if not issue_filter:
      start_time = datetime.datetime.now() - datetime.timedelta(weeks=24)
      issue_filter = {
        "since": start_time.isoformat(),
      }

    while has_next_issues_page:

      variables = {
        "org": org,
        "repo": repo,
        "pageSize": num_issues_per_page,
        "issueCursor": issues_cursor,
        "filter": issue_filter,
      }
      results = client.run_query(query, variables=variables)

      if results.get("errors"):
        message = json.dumps(results.get("errors"))
        logging.error(f"There was a problem issuing the query; errors:\n{message}\n")
        return

      if not total_issues:
        total_issues = results["data"]["repository"]["issues"]["totalCount"]
        num_pages = int(np.ceil(total_issues/float(num_issues_per_page)))
        logging.info("%s/%s has a total of %s issues", org, repo, total_issues)

      if output and not shard_writer:
        logging.info("initializing the shard writer")
        shard_writer = graphql.ShardWriter(num_pages, output,
                                           prefix="issues-{0}-{1}".format(org, repo))

      issues = graphql.unpack_and_split_nodes(
        results, ["data", "repository", "issues", "edges"])

      yield issues

      if shard_writer:
        shard_writer.write_shard(issues)

      page_info = results["data"]["repository"]["issues"]["pageInfo"]
      issues_cursor = page_info["endCursor"]
      has_next_issues_page = page_info["hasNextPage"]
Ejemplo n.º 2
0
  def _iter_issues(self, org, repo, output=None):
    """Iterate over issues in batches for a repository

    Args:
      org: The org that owns the repository
      repo: The directory for the repository
      output: The directory to write the results; if not specified results
        are not downloaded

    Writes the issues along with the first comments to a file in output
    directory.
    """
    client = graphql.GraphQLClient()

    num_issues_per_page = 100

    # TODO(jlewi):Use query variables
    # TODO(jlewi):
    query_template = """{{
repository(owner: "{org}", name: "{repo}") {{
  issues(first:{num_issues_per_page}, states: OPEN, {issues_cursor}) {{
    totalCount
    pageInfo {{
      endCursor
      hasNextPage
    }}
    edges{{
      node {{
        author {{
          __typename
                ... on User {{
                  login
                }}

                ... on Bot{{
                  login
                }}
        }}
        id
        title
        body
        url
        state
        labels(first:30, ){{
          totalCount
          edges {{
            node {{
              name
            }}
          }}
        }}
        projectCards(first:30, ){{
          totalCount
          edges {{
            node {{
              id
              project {{
                name
                number
              }}
            }}
          }}
        }}
      }}
    }}
  }}
}}
}}
"""


    shard = 0
    num_pages = None
    if output and not os.path.exists(output):
      os.makedirs(output)

    total_issues = None
    has_next_issues_page = True
    # TODO(jlewi): We should persist the cursors to disk so we can resume
    # after errors
    issues_cursor = None
    shard_writer = None
    while has_next_issues_page:
      issues_cursor_text = ""
      if issues_cursor:
        issues_cursor_text = "after:\"{0}\"".format(issues_cursor)
      query = query_template.format(org=org, repo=repo,
                                    num_issues_per_page=num_issues_per_page,
                                    issues_cursor=issues_cursor_text)
      results = client.run_query(query)

      if results.get("errors"):
        message = json.dumps(results.get("errors"))
        logging.error("There was a problem issuing the query; errors:\n%s",
                      "\n", message)
        return

      if not total_issues:
        total_issues = results["data"]["repository"]["issues"]["totalCount"]
        num_pages = int(np.ceil(total_issues/float(num_issues_per_page)))
        logging.info("%s/%s has a total of %s issues", org, repo, total_issues)

      if output and not shard_writer:
        logging.info("initializing the shard writer")
        shard_writer = graphql.ShardWriter(num_pages, output,
                                           prefix="issues-{0}-{1}".format(org, repo))

      issues = graphql.unpack_and_split_nodes(
        results, ["data", "repository", "issues", "edges"])

      yield issues

      if shard_writer:
        shard_writer.write_shard(issues)

      page_info = results["data"]["repository"]["issues"]["pageInfo"]
      issues_cursor = page_info["endCursor"]
      has_next_issues_page = page_info["hasNextPage"]