def get_issue_data(issue): created_at = _github_dt(issue["created_at"]) if issue['state'] == 'closed': closed_at = _github_dt(issue["closed_at"]) else: closed_at = datetime.datetime.utcnow() + datetime.timedelta(days=1) return dict(created_at=created_at, closed_at=closed_at, labels=issue['labels'])
def event_summary(events): issues = defaultdict(list) for event in events: issue_number = event.get('issue', {}).get('number') d = dict( dt=_github_dt(event['created_at']), action=event['event'], actor=event['actor']['login'], issue_number=issue_number, issue_state=event['issue']['state'], title=event['issue']['title'], sort_key=event['issue']['title'].split()[0], state=event['issue']['state'], labels=[x['name'] for x in event['issue']['labels']], ) issues[issue_number].append(d) # put oldest first issues[issue_number].sort(key=itemgetter('dt'), reverse=True) sorted_date = sorted([[e[0]['sort_key'], i, e] for i, e in issues.items()]) for sort_key, issue_number, data in sorted_date: # print "%s %r" % (issue_number, map(str, [e['dt'] for e in data])) title = data[0]['title'] actions = [x['action'] for x in data] state = data[0]['state'].upper() if state == "OPEN" and "RFR" in data[0]['labels']: state = "RFR" if state == "CLOSED" and "merged" in actions: state = "MERGED" print " * [#%s](https://github.com/%s/issues/%s) %s %s" % ( issue_number, tornado.options.options.repo, issue_number, state, title)
def run(event_cache_dirs): min_dt = datetime.datetime.strptime(tornado.options.options.min_dt, '%Y-%m-%d') max_dt = datetime.datetime.strptime(tornado.options.options.max_dt, '%Y-%m-%d').replace(hour=23, minute=59) events = [] for event_cache_dir in event_cache_dirs: for event_file in glob.glob(os.path.join(event_cache_dir, '*.json')): with open(event_file, 'r') as f: event_data = json.load(f) if is_filtered_out(event_data, min_dt, max_dt): continue events.append(event_data) dates = [_github_dt(event['created_at']) for event in events] logging.info("%d events from %s to %s", len(events), min(dates), max(dates)) print "---" print "" print "**PR ChangeLog (from %s to %s)**" % (min_dt.strftime('%Y-%m-%d %A'), max_dt.strftime('%Y-%m-%d %A')) print "" event_summary(events)
def run_interval(): for comment in comments_for_interval( datetime.timedelta(days=tornado.options.options.interval)): dt = _github_dt(comment["created_at"]) yyyymm = "%d-%02d" % (dt.year, dt.month) for feature in process_comment(comment): yield [yyyymm, feature.feature, feature.user, feature.value]
def comments_for_interval(interval): assert isinstance(interval, datetime.timedelta) min_dt = datetime.datetime.utcnow() - interval for filename in glob.glob("%s/*.json" % tornado.options.options.comment_cache_dir): comment = json.loads(open(filename, 'r').read()) if _github_dt(comment["created_at"]) < min_dt: logging.debug("skipping %s dt %s < %s", comment["id"], comment["created_at"], min_dt) continue logging.debug('comment %r', comment) yield comment for filename in glob.glob("%s/*.json" % tornado.options.options.review_cache_dir): comment = json.loads(open(filename, 'r').read()) if _github_dt(comment["created_at"]) < min_dt: logging.debug("skipping %s dt %s < %s", comment["id"], comment["created_at"], min_dt) continue logging.debug('comment %r', comment) yield comment
def load_data(): for issue_file in glob.glob( os.path.join(tornado.options.options.issue_cache_dir, '*.json')): with open(issue_file, 'r') as f: issue = json.load(f) if issue['state'] != 'open': continue if not issue.get("pull_request"): continue assignees = map(itemgetter('login'), issue['assignees']) if not assignees: assignees = [issue["user"]["login"]] yield dict( issue_number=issue["number"], title=issue['title'], assignees=assignees, updated_at=_github_dt(issue['updated_at']), created_at=_github_dt(issue['created_at']), )
def cache_comments(raw_comments): if not os.path.exists(tornado.options.options.comment_cache_dir): os.makedirs(tornado.options.options.comment_cache_dir) for comment in raw_comments: filename = os.path.join(tornado.options.options.comment_cache_dir, "%d.json" % comment['id']) if os.path.exists(filename): logging.warning('unlinking existing filename %s', filename) os.unlink(filename) logging.info('creating %s', filename) open(filename, 'w').write(json.dumps(comment)) if raw_comments: logging.info("dt %s", _github_dt(raw_comments[-1]["created_at"]))
def run_by_month(): min_dt = None if tornado.options.options.min_dt: min_dt = datetime.datetime.strptime(tornado.options.options.min_dt, '%Y-%m-%d') for comment in load_comments(): dt = _github_dt(comment["created_at"]) if min_dt and dt < min_dt: logging.debug('skipping comment dt %s - %s', dt, min_dt) continue yyyymm = "%d-%02d" % (dt.year, dt.month) for feature in process_comment(comment): yield [yyyymm, feature.feature, feature.user, feature.value]
def load_data(min_dt): for issue_file in glob.glob( os.path.join(tornado.options.options.issue_cache_dir, '*.json')): with open(issue_file, 'r') as f: issue = json.load(f) dt = _github_dt(issue['created_at']) if dt < min_dt: continue login = issue["user"]["login"] for assignee in issue["assignees"]: # pick a random one? login = assignee["login"] yield dict( issue_number=issue["number"], login=login, created_at=dt, labels=map(itemgetter('name'), issue["labels"]), )
def is_filtered_out(event, min_dt, max_dt): issue_number = event.get('issue', {}).get('number') if not issue_number: logging.warning('no issue number in %r', event) return True dt = _github_dt(event['created_at']) if dt < min_dt: return True if dt > max_dt: return True if not _is_event_related(event, tornado.options.options.actor): return True if tornado.options.options.skip_event_type and event[ 'event'] in tornado.options.options.skip_event_type: return True return False
def event_summary(events): issues = defaultdict(list) for event in events: logging.debug('%s', event) issue_number = event.get('issue', {}).get('number') d = dict( dt=_github_dt(event['created_at']), action=event['event'], actor=event['actor']['login'], issue_number=issue_number, issue_state=event['issue']['state'], title=event['issue']['title'], sort_key=event['issue']['title'].split()[0], state=event['issue']['state'], labels=[x['name'] for x in event['issue']['labels']], html_url=event['issue']['html_url'], repo='/'.join(event['issue']['repository_url'].split('/')[-2:]), ) issues[issue_number].append(d) # put oldest first issues[issue_number].sort(key=itemgetter('dt'), reverse=True) sorted_date = sorted([[e[0]['sort_key'], i, e] for i, e in issues.items()]) for sort_key, issue_number, data in sorted_date: # print "%s %r" % (issue_number, map(str, [e['dt'] for e in data])) title = data[0]['title'] actions = [x['action'] for x in data] state = data[0]['state'].upper() repo = data[0]['repo'] if repo == tornado.options.options.repo[0]: repo = '' if state == "OPEN" and "RFR" in data[0]['labels']: state = "RFR" if state == "CLOSED" and "merged" in actions: state = "MERGED" if state == "OPEN" and "WIP" in title: state = "WIP" print " * [%s#%s](%s) %s %s" % (repo, issue_number, data[0]['html_url'], state, title)