def from_issue(cls, issue):
    """Construct TriageInfo from the supplied issue"""
    info = TriageInfo()
    info.issue = issue
    labels = graphql.unpack_and_split_nodes(issue, ["labels", "edges"])

    project_cards = graphql.unpack_and_split_nodes(issue,
                                                   ["projectCards", "edges"])


    for l in labels:
      name = l["name"]

      if name in ALLOWED_KINDS:
        info.missing_kind = False

      if name in ALLOWED_PRIORITY:
        info.missing_priority = False

        if not name in REQUIRES_PROJECT:
          info.missing_project = False
        else:
          if project_cards:
            info.missing_project = False

      if name.startswith("area") or name.startswith("community"):
        info.missing_area= False

    for c in project_cards:
      if c.get("project").get("name") == TRIAGE_PROJECT:
        info.triage_project_card = c
        break

    return info
Exemple #2
0
  def from_issue(cls, issue):
    """Construct TriageInfo from the supplied issue"""
    info = TriageInfo()
    info.issue = issue
    labels = graphql.unpack_and_split_nodes(issue, ["labels", "edges"])

    project_cards = graphql.unpack_and_split_nodes(issue,
                                                   ["projectCards", "edges"])

    events = graphql.unpack_and_split_nodes(issue,
                                            ["timelineItems", "edges"])

    for l in labels:
      name = l["name"]

      if name in ALLOWED_PRIORITY:
        info.requires_project = name in REQUIRES_PROJECT

    for c in project_cards:
      if c.get("project").get("name") == TRIAGE_PROJECT:
        info.triage_project_card = c
        break

    # TODO(jlewi): Could we potentially miss some events since we aren't
    # paginating through all events for an issue? This should no longer
    # be an issue because _process_issue will call _get_issue and paginate
    # through all results.
    for e in events:

      if not "createdAt" in e:
        continue

      t = dateutil_parser.parse(e.get("createdAt"))

      if e.get("__typename") == "LabeledEvent":
        name = e.get("label").get("name")

        if name.startswith("kind"):
          if info.kind_time:
            continue
          info.kind_time = t

        if name.startswith("area") or name.startswith("platform"):
          if info.area_time:
            continue
          info.area_time = t

        if name in ALLOWED_PRIORITY:
          if info.priority_time:
            continue
          info.priority_time = t

      if e.get("__typename") == "AddedToProjectEvent":
        if info.project_time:
          continue
        info.project_time = t

    if issue.get("closedAt"):
      info.closed_at = dateutil_parser.parse(issue.get("closedAt"))

    return info
Exemple #3
0
  def update_kanban_board(self):
    """Checks if any issues in the needs triage board can be removed.
    """
    query = """query getIssues($issueCursor: String) {
  search(type: ISSUE, query: "is:open is:issue org:kubeflow project:kubeflow/26", first: 100, after: $issueCursor) {
    issueCount
    pageInfo {
      endCursor
      hasNextPage
    }
    edges {
      node {
        __typename
        ... on Issue {
          author {
            __typename
            ... on User {
              login
            }
            ... on Bot {
              login
            }
          }
          id
          title
          body
          url
          state
          createdAt
          closedAt
          labels(first: 30) {
            totalCount
            edges {
              node {
                name
              }
            }
          }
          projectCards(first: 30) {
            totalCount
            pageInfo {
              endCursor
              hasNextPage
            }
            edges {
              node {
                id
                project {
                  name
                  number
                }
              }
            }
          }
          timelineItems(first: 30) {
            totalCount
            pageInfo {
              endCursor
              hasNextPage
            }
            edges {
              node {
                __typename
                ... on AddedToProjectEvent {
                  createdAt
                }
                ... on LabeledEvent {
                  createdAt
                  label {
                    name
                  }
                }
                ... on ClosedEvent {
                  createdAt
                }
              }
            }
          }
        }
      }
    }
  }
}
"""
    issues_cursor = None
    has_next_issues_page = True
    while has_next_issues_page:

      variables = {
        "issueCursor": issues_cursor,
      }
      results = self.client.run_query(query, variables=variables)

      if results.get("errors"):
        message = json.dumps(results.get("errors"))
        logging.error(f"There was a problem issuing the query; errors:\n{message}\n")
        return

      issues = graphql.unpack_and_split_nodes(
        results, ["data", "search", "edges"])

      for i in issues:
        self._process_issue(i)

      page_info = results["data"]["search"]["pageInfo"]
      issues_cursor = page_info["endCursor"]
      has_next_issues_page = page_info["hasNextPage"]
Exemple #4
0
  def _iter_issues(self, org, repo, issue_filter=None, output=None):
    """Iterate over issues in batches for a repository

    Args:
      org: The org that owns the repository
      repo: The directory for the repository
      output: The directory to write the results; if not specified results
        are not downloaded
      issue_filter: Used to filter issues to consider based on when they were
        last updated

    Writes the issues along with the first comments to a file in output
    directory.
    """
    client = graphql.GraphQLClient()

    num_issues_per_page = 100

    if not issue_filter:
      today = datetime.datetime.now()
      today = datetime.datetime(year=today.year, month=today.month, day=today.day)

      start_time = today - datetime.timedelta(days=60)

    # Labels and projects are available via timeline events.
    # However, in timeline events project info (e.g. actual project name)
    # is only in developer preview.
    # The advantage of using labels and projectCards (as opposed to timeline
    # events) is that its much easier to bound the number of items we need
    # to fetch in order to return all labels and projects
    # for timeline items its much more likely the labels and projects we care
    # about will require pagination.
    #
    # TODO(jlewi): We should add a method to fetch all issue timeline items
    # via pagination in the case the number of items exceeds the page size.
    #
    # TODO(jlewi): We need to consider closed issues if we want to compute
    # stats.
    #
    # TODO(jlewi): We should support fetching only OPEN issues; if we are
    # deciding which issues need triage or have been triaged we really only
    # need to look at open isues. Closed Issues will automatically move to
    # the appropriate card in the Kanban board.
    query = """query getIssues($org: String!, $repo: String!, $pageSize: Int, $issueCursor: String, $filter: IssueFilters) {
  repository(owner: $org, name: $repo) {
    issues(first: $pageSize, filterBy: $filter, after: $issueCursor) {
      totalCount
      pageInfo {
        endCursor
        hasNextPage
      }
      edges {
        node {
          author {
            __typename
            ... on User {
              login
            }
            ... on Bot {
              login
            }
          }
          id
          title
          body
          url
          state
          createdAt
          closedAt
          labels(first: 30) {
            totalCount
            edges {
              node {
                name
              }
            }
          }
          projectCards(first: 30) {
            totalCount
            pageInfo {
              endCursor
              hasNextPage
            }
            edges {
              node {
                id
                project {
                  name
                  number
                }
              }
            }
          }
          timelineItems(first: 30) {
            totalCount
            pageInfo {
              endCursor
              hasNextPage
            }
            edges {
              node {
                __typename
                ... on AddedToProjectEvent {
                  createdAt

                }
                ... on LabeledEvent {
                  createdAt
                  label {
                    name
                  }
                }
                ... on ClosedEvent {
                  createdAt
                }
              }
            }
          }
        }
      }
    }
  }
}
"""

    shard = 0
    num_pages = None
    if output and not os.path.exists(output):
      os.makedirs(output)

    total_issues = None
    has_next_issues_page = True
    # TODO(jlewi): We should persist the cursors to disk so we can resume
    # after errors
    issues_cursor = None
    shard_writer = None

    if not issue_filter:
      start_time = datetime.datetime.now() - datetime.timedelta(weeks=24)
      issue_filter = {
        "since": start_time.isoformat(),
      }

    while has_next_issues_page:

      variables = {
        "org": org,
        "repo": repo,
        "pageSize": num_issues_per_page,
        "issueCursor": issues_cursor,
        "filter": issue_filter,
      }
      results = client.run_query(query, variables=variables)

      if results.get("errors"):
        message = json.dumps(results.get("errors"))
        logging.error(f"There was a problem issuing the query; errors:\n{message}\n")
        return

      if not total_issues:
        total_issues = results["data"]["repository"]["issues"]["totalCount"]
        num_pages = int(np.ceil(total_issues/float(num_issues_per_page)))
        logging.info("%s/%s has a total of %s issues", org, repo, total_issues)

      if output and not shard_writer:
        logging.info("initializing the shard writer")
        shard_writer = graphql.ShardWriter(num_pages, output,
                                           prefix="issues-{0}-{1}".format(org, repo))

      issues = graphql.unpack_and_split_nodes(
        results, ["data", "repository", "issues", "edges"])

      yield issues

      if shard_writer:
        shard_writer.write_shard(issues)

      page_info = results["data"]["repository"]["issues"]["pageInfo"]
      issues_cursor = page_info["endCursor"]
      has_next_issues_page = page_info["hasNextPage"]
  def _iter_issues(self, org, repo, output=None):
    """Iterate over issues in batches for a repository

    Args:
      org: The org that owns the repository
      repo: The directory for the repository
      output: The directory to write the results; if not specified results
        are not downloaded

    Writes the issues along with the first comments to a file in output
    directory.
    """
    client = graphql.GraphQLClient()

    num_issues_per_page = 100

    # TODO(jlewi):Use query variables
    # TODO(jlewi):
    query_template = """{{
repository(owner: "{org}", name: "{repo}") {{
  issues(first:{num_issues_per_page}, states: OPEN, {issues_cursor}) {{
    totalCount
    pageInfo {{
      endCursor
      hasNextPage
    }}
    edges{{
      node {{
        author {{
          __typename
                ... on User {{
                  login
                }}

                ... on Bot{{
                  login
                }}
        }}
        id
        title
        body
        url
        state
        labels(first:30, ){{
          totalCount
          edges {{
            node {{
              name
            }}
          }}
        }}
        projectCards(first:30, ){{
          totalCount
          edges {{
            node {{
              id
              project {{
                name
                number
              }}
            }}
          }}
        }}
      }}
    }}
  }}
}}
}}
"""


    shard = 0
    num_pages = None
    if output and not os.path.exists(output):
      os.makedirs(output)

    total_issues = None
    has_next_issues_page = True
    # TODO(jlewi): We should persist the cursors to disk so we can resume
    # after errors
    issues_cursor = None
    shard_writer = None
    while has_next_issues_page:
      issues_cursor_text = ""
      if issues_cursor:
        issues_cursor_text = "after:\"{0}\"".format(issues_cursor)
      query = query_template.format(org=org, repo=repo,
                                    num_issues_per_page=num_issues_per_page,
                                    issues_cursor=issues_cursor_text)
      results = client.run_query(query)

      if results.get("errors"):
        message = json.dumps(results.get("errors"))
        logging.error("There was a problem issuing the query; errors:\n%s",
                      "\n", message)
        return

      if not total_issues:
        total_issues = results["data"]["repository"]["issues"]["totalCount"]
        num_pages = int(np.ceil(total_issues/float(num_issues_per_page)))
        logging.info("%s/%s has a total of %s issues", org, repo, total_issues)

      if output and not shard_writer:
        logging.info("initializing the shard writer")
        shard_writer = graphql.ShardWriter(num_pages, output,
                                           prefix="issues-{0}-{1}".format(org, repo))

      issues = graphql.unpack_and_split_nodes(
        results, ["data", "repository", "issues", "edges"])

      yield issues

      if shard_writer:
        shard_writer.write_shard(issues)

      page_info = results["data"]["repository"]["issues"]["pageInfo"]
      issues_cursor = page_info["endCursor"]
      has_next_issues_page = page_info["hasNextPage"]
    def _iter_prs(self, org, repo):
        """Iterate over open PRs in the specified repo.

    Args:
      org: The org that owns the repository
      repo: The directory for the repository
      issue_filter: Used to filter issues to consider based on when they were
        last updated

    Writes the issues along with the first comments to a file in output
    directory.
    """
        num_prs_per_page = 25
        query = """query getIssues($org: String!, $repo: String!, $pageSize: Int, $issueCursor: String,) {
  repository(owner: $org, name: $repo) {
    pullRequests(first: $pageSize, after: $issueCursor, states: [OPEN]) {
      totalCount
      pageInfo {
        endCursor
        hasNextPage
      }
      edges {
        node {
          author {
            __typename
            ... on User {
              login
            }
            ... on Bot {
              login
            }
          }
          id
          number
          title
          url
          state
          headRefName
          createdAt
          closedAt
          labels(first: 30) {
            totalCount
            edges {
              node {
                name
              }
            }
          }
        }
      }
    }
  }
}
"""

        total_prs = None
        has_next_prs_page = True
        # TODO(jlewi): We should persist the cursors to disk so we can resume
        # after errors
        prs_cursor = None

        while has_next_prs_page:

            variables = {
                "org": org,
                "repo": repo,
                "pageSize": num_prs_per_page,
                "issueCursor": prs_cursor,
            }
            results = self._run_query(query, variables=variables)

            if results.get("errors"):
                message = json.dumps(results.get("errors"))
                logging.error(
                    f"There was a problem issuing the query; errors:\n{message}\n"
                )
                return

            if not total_prs:
                total_prs = results["data"]["repository"]["pullRequests"][
                    "totalCount"]
                logging.info("%s/%s has a total of %s pullRequests", org, repo,
                             total_prs)

            prs = graphql.unpack_and_split_nodes(
                results, ["data", "repository", "pullRequests", "edges"])
            for pr in prs:
                yield pr

            page_info = results["data"]["repository"]["pullRequests"][
                "pageInfo"]
            prs_cursor = page_info["endCursor"]
            has_next_prs_page = page_info["hasNextPage"]