def main(argv): parser = argparse.ArgumentParser(description="Summarize pull requests by organization.") parser.add_argument("--since", metavar="DAYS", type=int, help="Only consider pull requests closed in the past DAYS days" ) parser.add_argument("--start", type=date_arg, help="Date to start collecting, format is flexible: " "20141225, Dec/25/2014, 2014-12-25, etc" ) parser.add_argument("--end", type=date_arg, help="Date to end collecting, format is flexible: " "25/Dec/2014, 12/25/2014, 2014-12-25, etc" ) args = parser.parse_args(argv[1:]) since = None if args.since: since = date.today() - timedelta(days=args.since) if args.start: if since is not None: raise Exception("Can't use --since and --start") since = args.start repos = [ r for r in Repo.from_yaml() if r.track_pulls ] by_org = collections.defaultdict(list) for repo in repos: for pull in get_pulls(repo.name, state="closed", pull_details="list", org=True, since=since): # We only want external pull requests. if pull['intext'] != "external": continue # We only want merged pull requests. if pull['combinedstate'] != "merged": continue if args.end is not None: # We don't want to count things merged after our end date. merged = dateutil.parser.parse(pull['pull.merged_at']) if merged >= args.end: continue by_org[pull['org']].append(pull) keys = sorted(by_org, key=lambda k: len(by_org[k]), reverse=True) for key in keys: print("{}: {}".format(key, len(by_org[key]))) fmt = "{number:5d} {user.login:>17s} {title}" for i, pull in enumerate(by_org['other']): if i == 0: print("\n'Other' pull requests:") print(pull.format(fmt)) for i, pull in enumerate(by_org['unsigned']): if i == 0: print("\nUnsigned authors:") print(pull.format(fmt))
def insert_pulls(labels=None, state="open", since=None, org=False): mongo_client = MongoClient() mongo_collection = mongo_client.prs.prs issues = get_pulls("edx/edx-platform", labels, state, since, org) for issue in issues: mongo_collection.insert(issue)
def show_pulls(labels=None, show_comments=False, state="open", since=None, org=False): issues = get_pulls("edx/edx-platform", labels, state, since, org, pull_details="all") category = None for index, issue in enumerate(issues): if issue.get("org") != category: # new category! print category header category = issue["org"] print("-- {category} ----".format(category=category)) if 0: import pprint pprint.pprint(issue.obj) print(issue.format(ISSUE_FMT)) if show_comments: comments_url = URLObject(issue['comments_url']) comments_url = comments_url.set_query_param("sort", "created") comments_url = comments_url.set_query_param("direction", "desc") comments = paginated_get(comments_url) last_five_comments = reversed(more_itertools.take(5, comments)) for comment in last_five_comments: print(comment.format(COMMENT_FMT)) # index is now set to the total number of pull requests print() print("{num} pull requests".format(num=index+1))
def show_pulls(labels=None, show_comments=False, state="open", since=None, org=False): issues = get_pulls("edx/edx-platform", labels, state, since, org) category = None for index, issue in enumerate(issues): issue.load_pull_details() if issue.get("org") != category: # new category! print category header category = issue["org"] print("-- {category} ----".format(category=category)) if 0: import pprint pprint.pprint(issue.obj) print(issue.format(ISSUE_FMT)) if show_comments: comments_url = URLObject(issue['comments_url']) comments_url = comments_url.set_query_param("sort", "created") comments_url = comments_url.set_query_param("direction", "desc") comments = paginated_get(comments_url) last_five_comments = reversed(more_itertools.take(5, comments)) for comment in last_five_comments: print(comment.format(COMMENT_FMT)) # index is now set to the total number of pull requests print() print("{num} pull requests".format(num=index + 1))
def get_bucket_data(buckets, repo_name, date_bucket_fn): for pull in get_pulls(repo_name, state="all", pull_details="list", org=True): # print("{0[id]}: {0[combinedstate]} {0[intext]}".format(pull)) created = dateutil.parser.parse(pull['created_at']) buckets[date_bucket_fn(created)]["opened"][pull["intext"]] += 1 if pull['combinedstate'] == "merged": merged = dateutil.parser.parse(pull['pull.merged_at']) buckets[date_bucket_fn(merged)]["merged"][pull["intext"]] += 1
def show_pulls(labels=None, show_comments=False, state="open", since=None, org=False): months = collections.defaultdict(lambda: {'opened': 0, 'merged': 0}) issues = get_pulls("edx/edx-platform", labels, state, since, org, pull_details="all") for issue in issues: months[yearmonth(issue['created_at'])]['opened'] += 1 if issue['pull.merged']: months[yearmonth(issue['pull.merged_at'])]['merged'] += 1 print(months) for ym, data in sorted(months.items()): print("{ym},{data[opened]},{data[merged]}".format(ym=ym, data=data))
def one_repo(self, repo): issues = get_pulls(repo.name, state="open", org=True, pull_details="list") for issue in issues: issue["repo"] = repo.nick for label in issue['labels']: if label in self.team_names: self.add_pull(issue) break else: # Didn't find a blocking label, include it if external. if issue['intext'] == "external": self.add_pull(issue)
def get_comment_data(repo, since, internal): ora = set(["ormsbee", "wedaly", "stephensanchez"]) lms = set(["sarina", "cpennington", "dianakhuang", "davestgermain","flowerhack"]) cms = set(["cahrens", "andy-armstrong", "dmitchell","nasthagiri"]) analytics = set(["rocha","brianhw", "mulby"]) forums = set(["gwprice","jimabramson"]) pull_kwargs = dict(org=True, pull_details="get") open_issues = get_pulls(repo, state="open", **pull_kwargs) closed_issues = get_pulls(repo, state="closed", since=since, **pull_kwargs) for pull in itertools.chain(open_issues, closed_issues): #print("-"*80) #pprint(pull.obj) if pull['intext'] == "internal": continue users = set() for comment in paginated_get(pull['comments_url']): created_at = iso8601.parse_date(comment["created_at"]).replace(tzinfo=None) commenter = comment["user"]["login"] if created_at >= since and internal(commenter): if not users: print(pull.format("{id},{user.login},{pull.changed_files},{pull.additions},{pull.deletions}"), end="") print(pull.format(',"{title}"'), end="") print(pull.format(",{url}"), end="") users.add(commenter) if users: check_intersection(users, lms, "LMS") check_intersection(users, ora, "ORA") check_intersection(users, cms, "CMS") check_intersection(users, analytics, "ANALYTICS") check_intersection(users, forums, "FORUMS") print(",", end="") print(":".join("{}".format(user) for user in sorted(users)), end="") print()
def get_duration_data(durations, owner_repo="edx/edx-platform", since=None): """ Update `durations`, a dict of dict of lists of pull requests. `durations` has four lists of data, where each list contains pull requests: internal open pull requests (all) external open pull requests (all) internal closed pull requests (since the `since` value) external closed pull requests (since the `since` value) These lists are organized into a dictionary that categorizes the lists by position and state. """ open_issues_generator = itertools.izip( get_pulls(owner_repo, state="open", org=True), itertools.repeat("open") ) closed_issues_generator = itertools.izip( get_pulls(owner_repo, state="closed", since=since, org=True), itertools.repeat("closed") ) for issue, state in itertools.chain(open_issues_generator, closed_issues_generator): created_at = iso8601.parse_date(issue["created_at"]).replace(tzinfo=None) if state == "open": closed_at = datetime.utcnow() else: closed_at = iso8601.parse_date(issue["closed_at"]).replace(tzinfo=None) issue['duration'] = closed_at - created_at if DEBUG: print("{pr[id]}: {pr[intext]} {state}".format( pr=issue, state=state ), file=sys.stderr) durations[state][issue['intext']].append(issue)
def show_pulls(labels=None, show_comments=False, state="open", since=None, org=False): months = collections.defaultdict(lambda: {'opened': 0, 'merged': 0}) issues = get_pulls("edx/edx-platform", labels, state, since, org) for issue in issues: issue.load_pull_details() months[yearmonth(issue['created_at'])]['opened'] += 1 if issue['pull.merged']: months[yearmonth(issue['pull.merged_at'])]['merged'] += 1 print(months) for ym, data in sorted(months.items()): print("{ym},{data[opened]},{data[merged]}".format(ym=ym, data=data))
def one_repo(self, repo): issues = get_pulls(repo.name, state="open", org=True) for issue in issues: if "osc" in issue['labels']: intext = "external" elif issue['org'] == 'edX': intext = "internal" else: intext = "external" issue["intext"] = intext issue["id"] = issue_id = "{}.{}".format(repo.name, issue["number"]) issue["repo"] = repo.nick blocked = False for label in issue['labels']: if label == "osc": continue if label not in self.team_names: continue self.add_pull(issue) blocked = True if not blocked and intext == "external": self.add_pull(issue)
def get_opened_pulls(repo): """Produce a stream of pull requests.""" for issue in get_pulls(repo, state="all", org=True): if issue['intext'] == 'external': issue['created_at'] = iso8601.parse_date(issue["created_at"]).replace(tzinfo=None) yield issue
def show_pulls(labels=None, show_comments=False, state="open", since=None, org=False, intext=None, merged=False): """ `labels`: Filters PRs by labels (all are shown if None is specified) `show_comments`: shows the last 5 comments on each PR, if True `state`: Filter PRs by this state (either 'open' or 'closed') `since`: a datetime representing the earliest time from which to pull information. All PRs regardless of time are shown if None is specified. `org`: If True, sorts by PR author affiliation `intext`: specify 'int' (internal) or 'ext' (external) pull request `merged`: If True and state="closed", shows only PRs that were merged. """ num = 0 adds = 0 deletes = 0 repos = [ r for r in Repo.from_yaml() if r.track_pulls ] for repo in repos: issues = get_pulls(repo.name, labels, state, since, org=org or intext, pull_details="all") category = None for issue in issues: issue["repo"] = repo.nick if intext is not None: if issue["intext"] != intext: continue if state == 'closed' and merged and issue['combinedstate'] != 'merged': # If we're filtering on closed PRs, and only want those that are merged, # skip ones that were closed without merge. continue if state == 'closed' and since: # If this PR was closed prior to the last `since` interval of days, continue on # (it may have been *updated* - that is, referenced or commented on - more recently, # but we just want to see what's been merged or closed in the past "since" days) closed_at = dateutil.parser.parse(issue["closed_at"][:-1]) # Remove TZ information if closed_at < since: continue if org and issue.get("org") != category: # new category! print category header category = issue["org"] print("-- {category} ----".format(category=category)) if 0: import pprint pprint.pprint(issue.obj) print(issue.format(ISSUE_FMT)) num += 1 adds += issue['pull']['additions'] deletes += issue['pull']['deletions'] if show_comments: comments_url = URLObject(issue['comments_url']) comments_url = comments_url.set_query_param("sort", "created") comments_url = comments_url.set_query_param("direction", "desc") comments = paginated_get(comments_url) last_five_comments = reversed(more_itertools.take(5, comments)) for comment in last_five_comments: print(comment.format(COMMENT_FMT)) print() print("{num} pull requests; {adds}+ {deletes}-".format(num=num, adds=adds, deletes=deletes))