예제 #1
0
  def get_issue(url):
    """Get the data for a specific issue.

    Args:
      url: URL of the issue
    """
    gh_client = graphql.GraphQLClient()
    result = github_util.get_issue(url, gh_client)
    print(json.dumps(result, indent=4, sort_keys=True))
    def __init__(self):

        # A dictionary mapping keys to individual models.
        self._models = {}
        self._load_models()
        self._gh_client = graphql.GraphQLClient()

        if not self._gh_client._headers:
            logging.error(
                "client._headers not set on GraphQLClient. This likely "
                "means no GitHub credentials are loaded and requests to "
                "GitHub API will likely fail")
    def graphql_client(self, org, repo):
        """Return a GitHub GraphQL client for the specified org and repository.

    Args:
      org: The org.
      repo: The repo
    """
        # TODO(jlewi): Should we cache these?
        ghapp = github_app.GitHubApp.create_from_env()
        token_generator = github_app.GitHubAppTokenGenerator(
            ghapp, f"{org}/{repo}")
        gh_client = graphql.GraphQLClient(headers=token_generator.auth_headers)

        return gh_client
예제 #4
0
    def __init__(self):
        self._client = graphql.GraphQLClient()

        self._headers = None
        self._token_refresher = None
        # Try various methods to obtain credentials
        try:
            self._token_refresher = github_app.FixedAccessTokenGenerator.from_env(
            )

        except ValueError:
            logging.info(
                "Could not create a FixedAccessTokenGenerator; will try "
                "other methods for obtaining credentials")

        if not self._token_refresher:
            app = github_app.GitHubApp.create_from_env()
            self._token_refresher = github_app.GitHubAppTokenGenerator(
                app, "kubeflow/manifests")
예제 #5
0
    def add_labels_to_issue(self, installation_id, repo_owner, repo_name,
                            issue_num, predictions):
        """
        Add predicted labels to issue using GitHub API.
        Args:
          installation_id: repo installation id
          repo_owner: repo owner
          repo_name: repo name
          issue_num: issue index
          prediction: dict str-> float; dictionary of labels and their predicted
            probability
        """

        # TODO(jlewi): Should we cache the GitHub App? What about token
        # expiration?
        ghapp = github_app.GitHubApp.create_from_env()

        # Load IssueLabelBot config. Look for both organization configuration
        # and repo specific configuration.
        # TODO(jlewi): We should really cache these and use some form of
        # expiration to pick up changes.
        org_config = github_util.get_yaml(owner=repo_owner,
                                          repo=ORG_CONFIG_REPO,
                                          ghapp=ghapp)

        repo_config = github_util.get_yaml(owner=repo_owner,
                                           repo=repo_name,
                                           ghapp=ghapp)

        context = {
            "repo_owner": repo_owner,
            "repo_name": repo_name,
            "issue_num": issue_num
        }
        config = {}

        if org_config:
            config.update(org_config)

        if repo_config:
            config.update(repo_config)

        predictions = self.apply_repo_config(config, repo_owner, repo_name,
                                             predictions, ghapp)

        url = util.build_issue_url(repo_owner, repo_name, issue_num)

        token_generator = github_app.GitHubAppTokenGenerator(
            ghapp, f"{repo_owner}/{repo_name}")
        gh_client = graphql.GraphQLClient(headers=token_generator.auth_headers)
        issue_data = github_util.get_issue(url, gh_client)

        predicted_labels = set(predictions.keys())

        # Remove from label_names any labels which already been applied
        # or which were explicitly removed.
        label_names = set(predicted_labels) - set(issue_data["labels"])
        label_names = label_names - set(issue_data["removed_labels"])

        already_applied = predicted_labels.intersection(issue_data["labels"])
        removed = predicted_labels.intersection(issue_data["removed_labels"])

        filtered_info = {}
        filtered_info.update(context)
        filtered_info["predicted_labels"] = list(predicted_labels)
        filtered_info["already_applied"] = list(already_applied)
        filtered_info["removed"] = list(removed)

        logging.info("Filtered predictions", extra=filtered_info)
        label_names = list(label_names)

        # Check whether the bot has already commented on this issue.
        already_commented = False
        for a in LABEL_BOT_LOGINS:
            if a in issue_data["comment_authors"]:
                already_commented = True
                break

        if already_commented:
            logging.info("Label bot has already commented on issue.",
                         extra=context)
        else:
            logging.info("Label bot has not commented on issue.",
                         extra=context)

        if not installation_id:
            logging.info("No GitHub App Installation Provided Fetching it")
            installation_id = ghapp.get_installation_id(repo_owner, repo_name)
        install = ghapp.get_installation(installation_id)

        # We are using the GitHub3 library to add comments. We should
        # TODO(jlewi): We should Use GraphQL so we can use a single library.
        issue = install.issue(repo_owner, repo_name, issue_num)

        message = None
        if label_names:
            # create message
            # Create a markdown table with probabilities.
            rows = [
                "| Label  | Probability |", "| ------------- | ------------- |"
            ]

            for l in label_names:
                rows.append("| {} | {:.2f} |".format(l, predictions[l]))

            lines = [
                "Issue-Label Bot is automatically applying the labels:", ""
            ]
            lines.extend(rows)
            lines.append("")
            lines.append(
                "Please mark this comment with :thumbsup: or :thumbsdown: "
                "to give our bot feedback! ")
            lines.append(
                "Links: [app homepage](https://github.com/marketplace/issue-label-bot), "
                "[dashboard]({app_url}data/{repo_owner}/{repo_name}) and "
                "[code](https://github.com/hamelsmu/MLapp) for this bot.".
                format(app_url=self.app_url,
                       repo_owner=repo_owner,
                       repo_name=repo_name))
            message = "\n".join(lines)
            # label the issue using the GitHub api
            issue.add_labels(*label_names)
            context["labels"] = label_names
            logging.info(
                f'Add `{"`, `".join(label_names)}` to the issue # {issue_num}',
                extra=context)
        else:
            # We don't want a spam an issue with comments. So once label
            # bot comments on an issue we will not chime in to report that
            # we aren't commented.
            if not already_commented:
                # TODO(jlewi): Should we include top predictions for area and
                # platform? Maybe we should include top predictions for
                # all areas? The problem is the model only returns predictions
                # above the threshold.
                message = """Issue Label Bot is not confident enough to auto-label this issue.
                See [dashboard]({app_url}data/{repo_owner}/{repo_name}) for more details.
                """.format(app_url=self.app_url,
                           repo_owner=repo_owner,
                           repo_name=repo_name)
                logging.warning(
                    f'Not confident enough to label this issue: # {issue_num}',
                    extra=context)

        # make a comment using the GitHub api
        if message:
            comment = issue.create_comment(message)
예제 #6
0
  def _iter_issues(self, org, repo, issue_filter=None, output=None):
    """Iterate over issues in batches for a repository

    Args:
      org: The org that owns the repository
      repo: The directory for the repository
      output: The directory to write the results; if not specified results
        are not downloaded
      issue_filter: Used to filter issues to consider based on when they were
        last updated

    Writes the issues along with the first comments to a file in output
    directory.
    """
    client = graphql.GraphQLClient()

    num_issues_per_page = 100

    if not issue_filter:
      today = datetime.datetime.now()
      today = datetime.datetime(year=today.year, month=today.month, day=today.day)

      start_time = today - datetime.timedelta(days=60)

    # Labels and projects are available via timeline events.
    # However, in timeline events project info (e.g. actual project name)
    # is only in developer preview.
    # The advantage of using labels and projectCards (as opposed to timeline
    # events) is that its much easier to bound the number of items we need
    # to fetch in order to return all labels and projects
    # for timeline items its much more likely the labels and projects we care
    # about will require pagination.
    #
    # TODO(jlewi): We should add a method to fetch all issue timeline items
    # via pagination in the case the number of items exceeds the page size.
    #
    # TODO(jlewi): We need to consider closed issues if we want to compute
    # stats.
    #
    # TODO(jlewi): We should support fetching only OPEN issues; if we are
    # deciding which issues need triage or have been triaged we really only
    # need to look at open isues. Closed Issues will automatically move to
    # the appropriate card in the Kanban board.
    query = """query getIssues($org: String!, $repo: String!, $pageSize: Int, $issueCursor: String, $filter: IssueFilters) {
  repository(owner: $org, name: $repo) {
    issues(first: $pageSize, filterBy: $filter, after: $issueCursor) {
      totalCount
      pageInfo {
        endCursor
        hasNextPage
      }
      edges {
        node {
          author {
            __typename
            ... on User {
              login
            }
            ... on Bot {
              login
            }
          }
          id
          title
          body
          url
          state
          createdAt
          closedAt
          labels(first: 30) {
            totalCount
            edges {
              node {
                name
              }
            }
          }
          projectCards(first: 30) {
            totalCount
            pageInfo {
              endCursor
              hasNextPage
            }
            edges {
              node {
                id
                project {
                  name
                  number
                }
              }
            }
          }
          timelineItems(first: 30) {
            totalCount
            pageInfo {
              endCursor
              hasNextPage
            }
            edges {
              node {
                __typename
                ... on AddedToProjectEvent {
                  createdAt

                }
                ... on LabeledEvent {
                  createdAt
                  label {
                    name
                  }
                }
                ... on ClosedEvent {
                  createdAt
                }
              }
            }
          }
        }
      }
    }
  }
}
"""

    shard = 0
    num_pages = None
    if output and not os.path.exists(output):
      os.makedirs(output)

    total_issues = None
    has_next_issues_page = True
    # TODO(jlewi): We should persist the cursors to disk so we can resume
    # after errors
    issues_cursor = None
    shard_writer = None

    if not issue_filter:
      start_time = datetime.datetime.now() - datetime.timedelta(weeks=24)
      issue_filter = {
        "since": start_time.isoformat(),
      }

    while has_next_issues_page:

      variables = {
        "org": org,
        "repo": repo,
        "pageSize": num_issues_per_page,
        "issueCursor": issues_cursor,
        "filter": issue_filter,
      }
      results = client.run_query(query, variables=variables)

      if results.get("errors"):
        message = json.dumps(results.get("errors"))
        logging.error(f"There was a problem issuing the query; errors:\n{message}\n")
        return

      if not total_issues:
        total_issues = results["data"]["repository"]["issues"]["totalCount"]
        num_pages = int(np.ceil(total_issues/float(num_issues_per_page)))
        logging.info("%s/%s has a total of %s issues", org, repo, total_issues)

      if output and not shard_writer:
        logging.info("initializing the shard writer")
        shard_writer = graphql.ShardWriter(num_pages, output,
                                           prefix="issues-{0}-{1}".format(org, repo))

      issues = graphql.unpack_and_split_nodes(
        results, ["data", "repository", "issues", "edges"])

      yield issues

      if shard_writer:
        shard_writer.write_shard(issues)

      page_info = results["data"]["repository"]["issues"]["pageInfo"]
      issues_cursor = page_info["endCursor"]
      has_next_issues_page = page_info["hasNextPage"]
예제 #7
0
  def client(self):
    if not self._client:
      self._client = graphql.GraphQLClient()

    return self._client
예제 #8
0
  def _iter_issues(self, org, repo, output=None):
    """Iterate over issues in batches for a repository

    Args:
      org: The org that owns the repository
      repo: The directory for the repository
      output: The directory to write the results; if not specified results
        are not downloaded

    Writes the issues along with the first comments to a file in output
    directory.
    """
    client = graphql.GraphQLClient()

    num_issues_per_page = 100

    # TODO(jlewi):Use query variables
    # TODO(jlewi):
    query_template = """{{
repository(owner: "{org}", name: "{repo}") {{
  issues(first:{num_issues_per_page}, states: OPEN, {issues_cursor}) {{
    totalCount
    pageInfo {{
      endCursor
      hasNextPage
    }}
    edges{{
      node {{
        author {{
          __typename
                ... on User {{
                  login
                }}

                ... on Bot{{
                  login
                }}
        }}
        id
        title
        body
        url
        state
        labels(first:30, ){{
          totalCount
          edges {{
            node {{
              name
            }}
          }}
        }}
        projectCards(first:30, ){{
          totalCount
          edges {{
            node {{
              id
              project {{
                name
                number
              }}
            }}
          }}
        }}
      }}
    }}
  }}
}}
}}
"""


    shard = 0
    num_pages = None
    if output and not os.path.exists(output):
      os.makedirs(output)

    total_issues = None
    has_next_issues_page = True
    # TODO(jlewi): We should persist the cursors to disk so we can resume
    # after errors
    issues_cursor = None
    shard_writer = None
    while has_next_issues_page:
      issues_cursor_text = ""
      if issues_cursor:
        issues_cursor_text = "after:\"{0}\"".format(issues_cursor)
      query = query_template.format(org=org, repo=repo,
                                    num_issues_per_page=num_issues_per_page,
                                    issues_cursor=issues_cursor_text)
      results = client.run_query(query)

      if results.get("errors"):
        message = json.dumps(results.get("errors"))
        logging.error("There was a problem issuing the query; errors:\n%s",
                      "\n", message)
        return

      if not total_issues:
        total_issues = results["data"]["repository"]["issues"]["totalCount"]
        num_pages = int(np.ceil(total_issues/float(num_issues_per_page)))
        logging.info("%s/%s has a total of %s issues", org, repo, total_issues)

      if output and not shard_writer:
        logging.info("initializing the shard writer")
        shard_writer = graphql.ShardWriter(num_pages, output,
                                           prefix="issues-{0}-{1}".format(org, repo))

      issues = graphql.unpack_and_split_nodes(
        results, ["data", "repository", "issues", "edges"])

      yield issues

      if shard_writer:
        shard_writer.write_shard(issues)

      page_info = results["data"]["repository"]["issues"]["pageInfo"]
      issues_cursor = page_info["endCursor"]
      has_next_issues_page = page_info["hasNextPage"]
예제 #9
0
    def fetch_issues(self, org, repo, output):
        """Fetch issues for a repository

    Args:
      org: The org that owns the repository
      repo: The directory for the repository
      output: The directory to write the results

    Writes the issues along with the first comments to a file in output
    directory.
    """
        client = graphql.GraphQLClient()

        num_issues_per_page = 100
        query_template = """{{
repository(owner: "{org}", name: "{repo}") {{
  issues(first:{num_issues_per_page} {issues_cursor}) {{
    totalCount
    pageInfo {{
      endCursor
      hasNextPage
    }}
    edges{{
      node {{
        author {{
          __typename
                ... on User {{
                  login
                }}

                ... on Bot{{
                  login
                }}
        }}
        title
        body
        comments(first:20, ){{
          totalCount
          edges {{
            node {{
              author {{
          __typename
                ... on User {{
                  login
                }}

                ... on Bot{{
                  login
                }}
        			}}
              body
              createdAt
            }}
          }}
        }}
      }}
    }}
  }}
}}
}}
"""

        shard = 0
        num_pages = None
        if not os.path.exists(output):
            os.makedirs(output)

        total_issues = None
        has_next_issues_page = True
        # TODO(jlewi): We should persist the cursors to disk so we can resume
        # after errors
        issues_cursor = None
        while has_next_issues_page:
            issues_cursor_text = ""
            if issues_cursor:
                issues_cursor_text = "after:\"{0}\"".format(issues_cursor)
            query = query_template.format(
                org=org,
                repo=repo,
                num_issues_per_page=num_issues_per_page,
                issues_cursor=issues_cursor_text)
            results = client.run_query(query)

            if results.get("errors"):
                logging.error(
                    "There was a problem issuing the query; errors:\n%s",
                    "\n".join(results.get("errors")))
                return

            if not total_issues:
                total_issues = results["data"]["repository"]["issues"][
                    "totalCount"]
                num_pages = int(
                    np.ceil(total_issues / float(num_issues_per_page)))
                logging.info("%s/%s has a total of %s issues", org, repo,
                             total_issues)

            shard_file = os.path.join(
                output, "issues-{0}-{1}-{2:03d}-of-{3:03d}.json".format(
                    org, repo, shard, num_pages))

            issues = process_issue_results(results)
            with open(shard_file, "w") as hf:
                for i in issues:
                    json.dump(i, hf)
                    hf.write("\n")
                logging.info("Wrote shard %s to %s", shard, shard_file)
            shard += 1

            page_info = results["data"]["repository"]["issues"]["pageInfo"]
            issues_cursor = page_info["endCursor"]
            has_next_issues_page = page_info["hasNextPage"]