def setUp(self): cache = '/tmp/testcache' if os.path.isdir(cache): shutil.rmtree(cache) os.makedirs(cache) gh = GithubMock() ghw = GithubWrapper(gh, cachedir=cache) gr = ghw.get_repo('test/test') # FIXME - this should return a wrapped issue gi = gr.get_issue(1) self.iw = IssueWrapper(github=gh, repo=gr, issue=gi, cachedir=cache) self.iw.gitrepo = GitRepoWrapperMock()
def setUp(self): cache = '/tmp/testcache' if os.path.isdir(cache): shutil.rmtree(cache) os.makedirs(cache) gh = GithubMock() ghw = GithubWrapper(gh, cachedir=cache) gr = ghw.get_repo('test/test', verbose=False) # FIXME - this should return a wrapped issue gi = gr.get_issue(1) self.iw = IssueWrapper(github=gh, repo=gr, issue=gi, cachedir=cache) self.iw.file_indexer = FileIndexerMock()
class DefaultTriager: """ How to use: 1. Create a new class which inherits from DefaultTriager 2. Implement 'Triager.run(self)' method: - iterate over issues/pull requests - for each issue 1. create 'actions = DefaultActions()' 2. define which action(s) should be done updating 'actions' instance 3. call parent 'apply_actions' methods: 'DefaultTriager.apply_actions(iw, actions)' 3. Run: def main(): Triager().start() """ ITERATION = 0 debug = False cachedir_base = None BOTNAMES = C.DEFAULT_BOT_NAMES CLOSING_LABELS = [] def __init__(self, args=None): pass @classmethod def create_parser(cls): """Creates an argument parser Returns: A argparse.ArgumentParser object """ parser = argparse.ArgumentParser() parser.add_argument("--cachedir", type=str, dest='cachedir_base', default='~/.ansibullbot/cache') parser.add_argument("--logfile", type=str, default='/var/log/ansibullbot.log', help="Send logging to this file") parser.add_argument("--daemonize", action="store_true", help="run in a continuos loop") parser.add_argument("--daemonize_interval", type=int, default=(30 * 60), help="seconds to sleep between loop iterations") parser.add_argument("--debug", "-d", action="store_true", help="Debug output") parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output") parser.add_argument("--dry-run", "-n", action="store_true", help="Don't make any changes") parser.add_argument("--force", "-f", action="store_true", help="Do not ask questions") parser.add_argument("--pause", "-p", action="store_true", dest="always_pause", help="Always pause between prs|issues") parser.add_argument("--force_rate_limit", action="store_true", help="debug: force the rate limit") # useful for debugging parser.add_argument( "--dump_actions", action="store_true", help="serialize the actions to disk [/tmp/actions]") parser.add_argument( "--botmetafile", type=str, default=None, help="Use this filepath for botmeta instead of from the repo") return parser def set_logger(self): set_logger(debug=self.debug, logfile=self.logfile) def start(self): if self.force_rate_limit: logging.warning('attempting to trigger rate limit') self.trigger_rate_limit() return if self.daemonize: logging.info('starting daemonize loop') self.loop() else: logging.info('starting single run') self.run() logging.info('stopping bot') @RateLimited def _connect(self): """Connects to GitHub's API""" if self.github_token: return Github(base_url=self.github_url, login_or_token=self.github_token) else: return Github(base_url=self.github_url, login_or_token=self.github_user, password=self.github_pass) def is_pr(self, issue): if '/pull/' in issue.html_url: return True else: return False def is_issue(self, issue): return not self.is_pr(issue) @RateLimited def get_members(self, organization): """Get members of an organization Args: organization: name of the organization Returns: A list of GitHub login belonging to the organization """ members = [] update = False write_cache = False now = self.get_current_time() gh_org = self._connect().get_organization(organization) cachedir = os.path.join(self.cachedir_base, organization) if not os.path.isdir(cachedir): os.makedirs(cachedir) cachefile = os.path.join(cachedir, 'members.pickle') if os.path.isfile(cachefile): with open(cachefile, 'rb') as f: mdata = pickle.load(f) members = mdata[1] if mdata[0] < gh_org.updated_at: update = True else: update = True write_cache = True if update: members = gh_org.get_members() members = [x.login for x in members] # save the data if write_cache: mdata = [now, members] with open(cachefile, 'wb') as f: pickle.dump(mdata, f) return members @RateLimited def get_core_team(self, organization, teams): """Get members of the core team Args: organization: name of the teams' organization teams: list of teams that compose the project core team Returns: A list of GitHub login belonging to teams """ members = set() conn = self._connect() gh_org = conn.get_organization(organization) for team in gh_org.get_teams(): if team.name in teams: for member in team.get_members(): members.add(member.login) return sorted(members) #@RateLimited def get_valid_labels(self, repo): # use the repo wrapper to enable caching+updating if not self.ghw: self.gh = self._connect() self.ghw = GithubWrapper(self.gh) rw = self.ghw.get_repo(repo) vlabels = [] for vl in rw.labels: vlabels.append(vl.name) return vlabels def loop(self): '''Call the run method in a defined interval''' while True: self.run() self.ITERATION += 1 interval = self.daemonize_interval logging.info('sleep %ss (%sm)' % (interval, interval / 60)) time.sleep(interval) @abc.abstractmethod def run(self): pass def get_current_time(self): return datetime.utcnow() def render_boilerplate(self, tvars, boilerplate=None): template = environment.get_template('%s.j2' % boilerplate) comment = template.render(**tvars) return comment def apply_actions(self, iw, actions): action_meta = {'REDO': False} if actions.count() > 0: if self.dump_actions: self.dump_action_dict(iw, actions) if self.dry_run: print("Dry-run specified, skipping execution of actions") else: if self.force: print("Running actions non-interactive as you forced.") self.execute_actions(iw, actions) return action_meta cont = input("Take recommended actions (y/N/a/R/DEBUG)? ") if cont in ('a', 'A'): sys.exit(0) if cont in ('Y', 'y'): self.execute_actions(iw, actions) if cont in ('r', 'R'): action_meta['REDO'] = True if cont == 'DEBUG': # put the user into a breakpoint to do live debug action_meta['REDO'] = True import epdb epdb.st() elif self.always_pause: print("Skipping, but pause.") cont = input("Continue (Y/n/a/R/DEBUG)? ") if cont in ('a', 'A', 'n', 'N'): sys.exit(0) elif cont in ('r', 'R'): action_meta['REDO'] = True elif cont == 'DEBUG': # put the user into a breakpoint to do live debug import epdb epdb.st() action_meta['REDO'] = True else: print("Skipping.") # let the upper level code redo this issue return action_meta def execute_actions(self, iw, actions): """Turns the actions into API calls""" for commentid in actions.uncomment: iw.remove_comment_by_id(commentid) for comment in actions.comments: logging.info("acton: comment - " + comment) iw.add_comment(comment=comment) if actions.close: for newlabel in actions.newlabel: if newlabel in self.CLOSING_LABELS: logging.info('action: label - ' + newlabel) iw.add_label(label=newlabel) # https://github.com/PyGithub/PyGithub/blob/master/github/Issue.py#L263 logging.info('action: close') iw.instance.edit(state='closed') else: for unlabel in actions.unlabel: logging.info('action: unlabel - ' + unlabel) iw.remove_label(label=unlabel) for newlabel in actions.newlabel: logging.info('action: label - ' + newlabel) iw.add_label(label=newlabel) for user in actions.assign: logging.info('action: assign - ' + user) iw.assign_user(user) for user in actions.unassign: logging.info('action: unassign - ' + user) iw.unassign_user(user) if actions.merge: iw.merge() # FIXME why? self.build_history(iw) #@RateLimited def is_pr_merged(self, number, repo): '''Check if a PR# has been merged or not''' if number is None: raise Exception('Can not check merge state on the number: None') merged = False pr = None try: pr = repo.get_pullrequest(number) except Exception as e: print(e) if pr: try: merged = pr.merged except Exception as e: logging.debug(e) return merged def trigger_rate_limit(self): '''Repeatedly make calls to exhaust rate limit''' self.gh = self._connect() self.ghw = GithubWrapper(self.gh) while True: cachedir = os.path.join(self.cachedir_base, self.repo) thisrepo = self.ghw.get_repo(self.repo, verbose=False) issues = thisrepo.repo.get_issues() rl = thisrepo.get_rate_limit() pprint(rl) for issue in issues: iw = IssueWrapper(github=self.ghw, repo=thisrepo, issue=issue, cachedir=cachedir) iw.history rl = thisrepo.get_rate_limit() pprint(rl) def dump_action_dict(self, issue, actions): '''Serialize the action dict to disk for quick(er) debugging''' fn = os.path.join('/tmp', 'actions', issue.repo_full_name, to_text(issue.number) + '.json') dn = os.path.dirname(fn) if not os.path.isdir(dn): os.makedirs(dn) logging.info(f'dumping {fn}') with open(fn, 'wb') as f: f.write(json.dumps(actions, indent=2, sort_keys=True))
class DefaultTriager(object): ITERATION = 0 def __init__(self): parser = self.create_parser() args = parser.parse_args() for x in vars(args): val = getattr(args, x) setattr(self, x, val) self.last_run = None self.github_user = C.DEFAULT_GITHUB_USERNAME self.github_pass = C.DEFAULT_GITHUB_PASSWORD self.github_token = C.DEFAULT_GITHUB_TOKEN # where to store junk self.cachedir_base = os.path.expanduser(self.cachedir_base) self.set_logger() logging.info('starting bot') # connect to github logging.info('creating api connection') self.gh = self._connect() # wrap the connection logging.info('creating api wrapper') self.ghw = GithubWrapper(self.gh, cachedir=self.cachedir_base) @classmethod def create_parser(cls): """Creates an argument parser Returns: A argparse.ArgumentParser object """ parser = argparse.ArgumentParser() parser.add_argument("--cachedir", type=str, dest='cachedir_base', default='~/.ansibullbot/cache') parser.add_argument("--logfile", type=str, default='/var/log/ansibullbot.log', help="Send logging to this file") parser.add_argument("--daemonize", action="store_true", help="run in a continuos loop") parser.add_argument("--daemonize_interval", type=int, default=(30 * 60), help="seconds to sleep between loop iterations") parser.add_argument("--debug", "-d", action="store_true", help="Debug output") parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output") parser.add_argument("--dry-run", "-n", action="store_true", help="Don't make any changes") parser.add_argument("--force", "-f", action="store_true", help="Do not ask questions") parser.add_argument("--pause", "-p", action="store_true", dest="always_pause", help="Always pause between prs|issues") parser.add_argument("--force_rate_limit", action="store_true", help="debug: force the rate limit") parser.add_argument("--force_description_fixer", action="store_true", help="Always invoke the description fixer") # useful for debugging parser.add_argument( "--dump_actions", action="store_true", help="serialize the actions to disk [/tmp/actions]") parser.add_argument( "--botmetafile", type=str, default=None, help="Use this filepath for botmeta instead of from the repo") return parser def set_logger(self): if self.debug: logging.level = logging.DEBUG else: logging.level = logging.INFO logFormatter = \ logging.Formatter("%(asctime)s %(levelname)s %(message)s") rootLogger = logging.getLogger() if self.debug: rootLogger.setLevel(logging.DEBUG) else: rootLogger.setLevel(logging.INFO) logdir = os.path.dirname(self.logfile) if logdir and not os.path.isdir(logdir): os.makedirs(logdir) fileHandler = WatchedFileHandler(self.logfile) fileHandler.setFormatter(logFormatter) rootLogger.addHandler(fileHandler) consoleHandler = logging.StreamHandler() consoleHandler.setFormatter(logFormatter) rootLogger.addHandler(consoleHandler) def start(self): if self.force_rate_limit: logging.warning('attempting to trigger rate limit') self.trigger_rate_limit() return if self.daemonize: logging.info('starting daemonize loop') self.loop() else: logging.info('starting single run') self.run() logging.info('stopping bot') @RateLimited def _connect(self): """Connects to GitHub's API""" if self.github_token: return Github(login_or_token=self.github_token) else: return Github(login_or_token=self.github_user, password=self.github_pass) def is_pr(self, issue): if '/pull/' in issue.html_url: return True else: return False def is_issue(self, issue): return not self.is_pr(issue) @RateLimited def get_members(self, organization): """Get members of an organization Args: organization: name of the organization Returns: A list of GitHub login belonging to the organization """ members = [] update = False write_cache = False now = self.get_current_time() gh_org = self._connect().get_organization(organization) cachedir = os.path.join(self.cachedir_base, organization) if not os.path.isdir(cachedir): os.makedirs(cachedir) cachefile = os.path.join(cachedir, 'members.pickle') if os.path.isfile(cachefile): with open(cachefile, 'rb') as f: mdata = pickle.load(f) members = mdata[1] if mdata[0] < gh_org.updated_at: update = True else: update = True write_cache = True if update: members = gh_org.get_members() members = [x.login for x in members] # save the data if write_cache: mdata = [now, members] with open(cachefile, 'wb') as f: pickle.dump(mdata, f) return members @RateLimited def get_core_team(self, organization, teams): """Get members of the core team Args: organization: name of the teams' organization teams: list of teams that compose the project core team Returns: A list of GitHub login belonging to teams """ members = set() conn = self._connect() gh_org = conn.get_organization(organization) for team in gh_org.get_teams(): if team.name in teams: for member in team.get_members(): members.add(member.login) return sorted(members) #@RateLimited def get_valid_labels(self, repo): # use the repo wrapper to enable caching+updating if not self.ghw: self.gh = self._connect() self.ghw = GithubWrapper(self.gh) rw = self.ghw.get_repo(repo) vlabels = [] for vl in rw.get_labels(): vlabels.append(vl.name) return vlabels def loop(self): '''Call the run method in a defined interval''' while True: self.run() self.ITERATION += 1 interval = self.daemonize_interval logging.info('sleep %ss (%sm)' % (interval, interval / 60)) time.sleep(interval) @abc.abstractmethod def run(self): pass def get_current_time(self): return datetime.utcnow() def render_boilerplate(self, tvars, boilerplate=None): template = environment.get_template('%s.j2' % boilerplate) comment = template.render(**tvars) return comment def apply_actions(self, iw, actions): action_meta = {'REDO': False} if actions.count() > 0: if self.dump_actions: self.dump_action_dict(iw, actions) if self.dry_run: print("Dry-run specified, skipping execution of actions") else: if self.force: print("Running actions non-interactive as you forced.") self.execute_actions(iw, actions) return action_meta cont = raw_input( "Take recommended actions (y/N/a/R/T/DEBUG)? ") if cont in ('a', 'A'): sys.exit(0) if cont in ('Y', 'y'): self.execute_actions(iw, actions) if cont == 'T': self.template_wizard(iw) action_meta['REDO'] = True if cont == 'r' or cont == 'R': action_meta['REDO'] = True if cont == 'DEBUG': # put the user into a breakpoint to do live debug action_meta['REDO'] = True import epdb epdb.st() elif self.always_pause: print("Skipping, but pause.") cont = raw_input("Continue (Y/n/a/R/T/DEBUG)? ") if cont in ('a', 'A', 'n', 'N'): sys.exit(0) if cont == 'T': self.template_wizard(iw) action_meta['REDO'] = True elif cont == 'REDO': action_meta['REDO'] = True elif cont == 'DEBUG': # put the user into a breakpoint to do live debug import epdb epdb.st() action_meta['REDO'] = True elif self.force_description_fixer: # FIXME: self.FIXED_ISSUES not defined since 1cf9674cd38edbd17aff906d72296c99043e5c13 # either define self.FIXED_ISSUES, either remove this method # FIXME force_description_fixer is not known by DefaultTriager (only # by AnsibleTriage): if not removed, move it to AnsibleTriage if iw.html_url not in self.FIXED_ISSUES: if self.meta['template_missing_sections']: changed = self.template_wizard(iw) if changed: action_meta['REDO'] = True self.FIXED_ISSUES.append(iw.html_url) else: print("Skipping.") # let the upper level code redo this issue return action_meta def template_wizard(self, iw): DF = DescriptionFixer(iw, self.meta) old = iw.body old_lines = old.split('\n') new = DF.new_description new_lines = new.split('\n') total_lines = len(new_lines) if len(old_lines) > total_lines: total_lines = len(old_lines) if len(new_lines) < total_lines: delta = total_lines - len(new_lines) for x in xrange(0, delta): new_lines.append('') if len(old_lines) < total_lines: delta = total_lines - len(old_lines) for x in xrange(0, delta): old_lines.append('') line = '--------------------------------------------------------' padding = 100 print("%s|%s" % (line.ljust(padding), line)) for c1, c2 in zip(old_lines, new_lines): if len(c1) > padding: c1 = c1[:padding - 4] if len(c2) > padding: c2 = c2[:padding - 4] print("%s|%s" % (c1.rstrip().ljust(padding), c2.rstrip())) print("%s|%s" % (line.rstrip().ljust(padding), line)) print('# ' + iw.html_url) cont = raw_input("Apply this new description? (Y/N) ") if cont == 'Y': iw.set_description(DF.new_description) return True else: return False def execute_actions(self, iw, actions): """Turns the actions into API calls""" for comment in actions.comments: logging.info("acton: comment - " + comment) iw.add_comment(comment=comment) if actions.close: # https://github.com/PyGithub/PyGithub/blob/master/github/Issue.py#L263 logging.info('action: close') iw.instance.edit(state='closed') return for unlabel in actions.unlabel: logging.info('action: unlabel - ' + unlabel) iw.remove_label(label=unlabel) for newlabel in actions.newlabel: logging.info('action: label - ' + newlabel) iw.add_label(label=newlabel) for user in actions.assign: logging.info('action: assign - ' + user) iw.assign_user(user) for user in actions.unassign: logging.info('action: unassign - ' + user) iw.unassign_user(user) if actions.merge: iw.merge() #@RateLimited def is_pr_merged(self, number, repo): '''Check if a PR# has been merged or not''' if number is None: import epdb epdb.st() raise Exception('Can not check merge state on the number: None') merged = False pr = None try: pr = repo.get_pullrequest(number) except Exception as e: print(e) if pr: try: merged = pr.merged except Exception as e: logging.debug(e) import epdb epdb.st() return merged def trigger_rate_limit(self): '''Repeatedly make calls to exhaust rate limit''' self.gh = self._connect() self.ghw = GithubWrapper(self.gh) while True: cachedir = os.path.join(self.cachedir_base, self.repo) thisrepo = self.ghw.get_repo(self.repo, verbose=False) issues = thisrepo.repo.get_issues() rl = thisrepo.get_rate_limit() pprint(rl) for issue in issues: iw = IssueWrapper(github=self.ghw, repo=thisrepo, issue=issue, cachedir=cachedir) iw.history rl = thisrepo.get_rate_limit() pprint(rl) def dump_action_dict(self, issue, actions): '''Serialize the action dict to disk for quick(er) debugging''' fn = os.path.join('/tmp', 'actions', issue.repo_full_name, str(issue.number) + '.json') dn = os.path.dirname(fn) if not os.path.isdir(dn): os.makedirs(dn) logging.info('dumping {}'.format(fn)) with open(fn, 'wb') as f: f.write(json.dumps(actions, indent=2, sort_keys=True))
class DefaultTriager(object): ITERATION = 0 def __init__(self, args): self.args = args self.last_run = None self.daemonize = None self.daemonize_interval = None self.dry_run = False self.force = False self.configfile = self.args.configfile self.config = ConfigParser.ConfigParser() self.config.read([self.configfile]) try: self.github_user = self.config.get('defaults', 'github_username') except: self.github_user = None try: self.github_pass = self.config.get('defaults', 'github_password') except: self.github_pass = None try: self.github_token = self.config.get('defaults', 'github_token') except: self.github_token = None self.repopath = self.args.repo self.logfile = self.args.logfile # where to store junk self.cachedir = self.args.cachedir self.cachedir = os.path.expanduser(self.cachedir) self.cachedir_base = self.cachedir self.set_logger() logging.info('starting bot') logging.debug('setting bot attributes') for x in vars(self.args): val = getattr(self.args, x) setattr(self, x, val) if hasattr(self.args, 'pause') and self.args.pause: self.always_pause = True # connect to github logging.info('creating api connection') self.gh = self._connect() # wrap the connection logging.info('creating api wrapper') self.ghw = GithubWrapper(self.gh, cachedir=self.cachedir) # get valid labels logging.info('getting labels') self.valid_labels = self.get_valid_labels(self.repopath) @property def resume(self): '''Returns a dict with the last issue repo+number processed''' if not hasattr(self, 'args'): return None if hasattr(self.args, 'pr') and self.args.pr: return None if not hasattr(self.args, 'resume'): return None if not self.args.resume: return None if hasattr(self, 'cachedir_base'): resume_file = os.path.join(self.cachedir_base, 'resume.json') else: resume_file = os.path.join(self.cachedir, 'resume.json') if not os.path.isfile(resume_file): return None with open(resume_file, 'rb') as f: data = json.loads(f.read()) return data def set_resume(self, repo, number): if not hasattr(self, 'args'): return None if hasattr(self.args, 'pr') and self.args.pr: return None if not hasattr(self.args, 'resume'): return None if not self.args.resume: return None data = {'repo': repo, 'number': number} if hasattr(self, 'cachedir_base'): resume_file = os.path.join(self.cachedir_base, 'resume.json') else: resume_file = os.path.join(self.cachedir, 'resume.json') with open(resume_file, 'wb') as f: f.write(json.dumps(data, indent=2)) def set_logger(self): if hasattr(self.args, 'debug') and self.args.debug: logging.level = logging.DEBUG else: logging.level = logging.INFO logFormatter = \ logging.Formatter("%(asctime)s %(levelname)s %(message)s") rootLogger = logging.getLogger() if hasattr(self.args, 'debug') and self.args.debug: rootLogger.setLevel(logging.DEBUG) else: rootLogger.setLevel(logging.INFO) if hasattr(self.args, 'logfile'): logfile = self.args.logfile else: logfile = '/tmp/ansibullbot.log' logdir = os.path.dirname(logfile) if logdir and not os.path.isdir(logdir): os.makedirs(logdir) fileHandler = logging.FileHandler(logfile) fileHandler.setFormatter(logFormatter) rootLogger.addHandler(fileHandler) consoleHandler = logging.StreamHandler() consoleHandler.setFormatter(logFormatter) rootLogger.addHandler(consoleHandler) def start(self): if hasattr(self.args, 'force_rate_limit') and \ self.args.force_rate_limit: logging.warning('attempting to trigger rate limit') self.trigger_rate_limit() return if hasattr(self.args, 'daemonize') and self.args.daemonize: logging.info('starting daemonize loop') self.loop() else: logging.info('starting single run') self.run() logging.info('stopping bot') @RateLimited def _connect(self): """Connects to GitHub's API""" if self.github_token: return Github(login_or_token=self.github_token) else: return Github(login_or_token=self.github_user, password=self.github_pass) @abc.abstractmethod def _get_repo_path(self): pass def is_pr(self, issue): if '/pull/' in issue.html_url: return True else: return False def is_issue(self, issue): return not self.is_pr(issue) @RateLimited def get_members(self, organization): """Get members of an organization Args: organization: name of the organization Returns: A list of GitHub login belonging to the organization """ members = [] update = False write_cache = False now = self.get_current_time() gh_org = self._connect().get_organization(organization) cachedir = self.cachedir if cachedir.endswith('/issues'): cachedir = os.path.dirname(cachedir) cachefile = os.path.join(cachedir, 'members.pickle') if not os.path.isdir(cachedir): os.makedirs(cachedir) if os.path.isfile(cachefile): with open(cachefile, 'rb') as f: mdata = pickle.load(f) members = mdata[1] if mdata[0] < gh_org.updated_at: update = True else: update = True write_cache = True if update: members = gh_org.get_members() members = [x.login for x in members] # save the data if write_cache: mdata = [now, members] with open(cachefile, 'wb') as f: pickle.dump(mdata, f) return members @RateLimited def get_core_team(self, organization, teams): """Get members of the core team Args: organization: name of the teams' organization teams: list of teams that compose the project core team Returns: A list of GitHub login belonging to teams """ members = set() conn = self._connect() gh_org = conn.get_organization(organization) for team in gh_org.get_teams(): if team.name in teams: for member in team.get_members(): members.add(member.login) return sorted(members) #@RateLimited def get_valid_labels(self, repo=None): # use the repo wrapper to enable caching+updating if not self.ghw: self.gh = self._connect() self.ghw = GithubWrapper(self.gh) if not repo: # OLD workflow self.repo = self.ghw.get_repo(self._get_repo_path()) vlabels = [] for vl in self.repo.get_labels(): vlabels.append(vl.name) else: # v3 workflow rw = self.ghw.get_repo(repo) vlabels = [] for vl in rw.get_labels(): vlabels.append(vl.name) return vlabels def debug(self, msg=""): """Prints debug message if verbosity is given""" if self.verbose: print("Debug: " + msg) def loop(self): '''Call the run method in a defined interval''' while True: self.run() self.ITERATION += 1 interval = self.args.daemonize_interval logging.info('sleep %ss (%sm)' % (interval, interval / 60)) time.sleep(interval) @abc.abstractmethod def run(self): pass def get_current_time(self): return datetime.utcnow() def render_boilerplate(self, tvars, boilerplate=None): template = environment.get_template('%s.j2' % boilerplate) comment = template.render(**tvars) return comment def check_safe_match(self, iw, actions): """ Turn force on or off depending on match characteristics """ safe_match = False if actions.count() == 0: safe_match = True elif not actions.close and not actions.unlabel: if len(actions.newlabel) == 1: if actions.newlabel[0].startswith('affects_'): safe_match = True else: safe_match = False if self.module: if self.module in iw.instance.title.lower(): safe_match = True # be more lenient on re-notifications if not safe_match: if not actions.close and \ not actions.unlabel and \ not actions.newlabel: if len(actions.comments) == 1: if 'still waiting' in actions.comments[0]: safe_match = True if safe_match: self.force = True else: self.force = False def apply_actions(self, iw, actions): action_meta = {'REDO': False} if hasattr(self, 'safe_force') and self.safe_force: self.check_safe_match(iw, actions) if actions.count() > 0: if hasattr(self, 'args'): if hasattr(self.args, 'dump_actions'): if self.args.dump_actions: self.dump_action_dict(iw, actions) if self.dry_run: print("Dry-run specified, skipping execution of actions") else: if self.force: print("Running actions non-interactive as you forced.") self.execute_actions(iw, actions) return action_meta cont = raw_input( "Take recommended actions (y/N/a/R/T/DEBUG)? ") if cont in ('a', 'A'): sys.exit(0) if cont in ('Y', 'y'): self.execute_actions(iw, actions) if cont == 'T': self.template_wizard(iw) action_meta['REDO'] = True if cont == 'r' or cont == 'R': action_meta['REDO'] = True if cont == 'DEBUG': # put the user into a breakpoint to do live debug action_meta['REDO'] = True import epdb epdb.st() elif self.always_pause: print("Skipping, but pause.") cont = raw_input("Continue (Y/n/a/R/T/DEBUG)? ") if cont in ('a', 'A', 'n', 'N'): sys.exit(0) if cont == 'T': self.template_wizard(iw) action_meta['REDO'] = True elif cont == 'REDO': action_meta['REDO'] = True elif cont == 'DEBUG': # put the user into a breakpoint to do live debug import epdb epdb.st() action_meta['REDO'] = True elif hasattr(self, 'force_description_fixer' ) and self.args.force_description_fixer: if iw.html_url not in self.FIXED_ISSUES: if self.meta['template_missing_sections']: changed = self.template_wizard(iw) if changed: action_meta['REDO'] = True self.FIXED_ISSUES.append(iw.html_url) else: print("Skipping.") # let the upper level code redo this issue return action_meta def template_wizard(self, iw): DF = DescriptionFixer(iw, self.meta) old = iw.body old_lines = old.split('\n') new = DF.new_description new_lines = new.split('\n') total_lines = len(new_lines) if len(old_lines) > total_lines: total_lines = len(old_lines) if len(new_lines) < total_lines: delta = total_lines - len(new_lines) for x in xrange(0, delta): new_lines.append('') if len(old_lines) < total_lines: delta = total_lines - len(old_lines) for x in xrange(0, delta): old_lines.append('') line = '--------------------------------------------------------' padding = 100 print("%s|%s" % (line.ljust(padding), line)) for c1, c2 in zip(old_lines, new_lines): if len(c1) > padding: c1 = c1[:padding - 4] if len(c2) > padding: c2 = c2[:padding - 4] print("%s|%s" % (c1.rstrip().ljust(padding), c2.rstrip())) print("%s|%s" % (line.rstrip().ljust(padding), line)) print('# ' + iw.html_url) cont = raw_input("Apply this new description? (Y/N) ") if cont == 'Y': iw.set_description(DF.new_description) return True else: return False def execute_actions(self, iw, actions): """Turns the actions into API calls""" for comment in actions.comments: logging.info("acton: comment - " + comment) iw.add_comment(comment=comment) if actions.close: # https://github.com/PyGithub/PyGithub/blob/master/github/Issue.py#L263 logging.info('action: close') iw.instance.edit(state='closed') return for unlabel in actions.unlabel: logging.info('action: unlabel - ' + unlabel) iw.remove_label(label=unlabel) for newlabel in actions.newlabel: logging.info('action: label - ' + newlabel) iw.add_label(label=newlabel) for user in actions.assign: logging.info('action: assign - ' + user) iw.assign_user(user) for user in actions.unassign: logging.info('action: unassign - ' + user) iw.unassign_user(user) if actions.merge: iw.merge() @RateLimited def is_pr_merged(self, number, repo=None): '''Check if a PR# has been merged or not''' merged = False pr = None try: if not repo: pr = self.repo.get_pullrequest(number) else: pr = repo.get_pullrequest(number) except Exception as e: print(e) if pr: merged = pr.merged return merged def dump_action_dict(self, issue, actions): '''Serialize the action dict to disk for quick(er) debugging''' fn = os.path.join('/tmp', 'actions', issue.repo_full_name, str(issue.number) + '.json') dn = os.path.dirname(fn) if not os.path.isdir(dn): os.makedirs(dn) logging.info('dumping {}'.format(fn)) with open(fn, 'wb') as f: f.write(json.dumps(actions, indent=2, sort_keys=True))
class DefaultTriager(object): """ How to use: 1. Create a new class which inherits from DefaultTriager 2. Implement 'Triager.run(self)' method: - iterate over issues/pull requests - for each issue 1. create 'actions = DefaultActions()' 2. define which action(s) should be done updating 'actions' instance 3. call parent 'apply_actions' methods: 'DefaultTriager.apply_actions(iw, actions)' 3. Run: def main(): Triager().start() """ ITERATION = 0 def __init__(self): parser = self.create_parser() args = parser.parse_args() for x in vars(args): val = getattr(args, x) setattr(self, x, val) self.last_run = None self.github_url = C.DEFAULT_GITHUB_URL self.github_user = C.DEFAULT_GITHUB_USERNAME self.github_pass = C.DEFAULT_GITHUB_PASSWORD self.github_token = C.DEFAULT_GITHUB_TOKEN # where to store junk self.cachedir_base = os.path.expanduser(self.cachedir_base) self.set_logger() logging.info('starting bot') # connect to github logging.info('creating api connection') self.gh = self._connect() # wrap the connection logging.info('creating api wrapper') self.ghw = GithubWrapper(self.gh, cachedir=self.cachedir_base) @classmethod def create_parser(cls): """Creates an argument parser Returns: A argparse.ArgumentParser object """ parser = argparse.ArgumentParser() parser.add_argument("--cachedir", type=str, dest='cachedir_base', default='~/.ansibullbot/cache') parser.add_argument("--logfile", type=str, default='/var/log/ansibullbot.log', help="Send logging to this file") parser.add_argument("--daemonize", action="store_true", help="run in a continuos loop") parser.add_argument("--daemonize_interval", type=int, default=(30 * 60), help="seconds to sleep between loop iterations") parser.add_argument("--debug", "-d", action="store_true", help="Debug output") parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output") parser.add_argument("--dry-run", "-n", action="store_true", help="Don't make any changes") parser.add_argument("--force", "-f", action="store_true", help="Do not ask questions") parser.add_argument("--pause", "-p", action="store_true", dest="always_pause", help="Always pause between prs|issues") parser.add_argument("--force_rate_limit", action="store_true", help="debug: force the rate limit") parser.add_argument("--force_description_fixer", action="store_true", help="Always invoke the description fixer") # useful for debugging parser.add_argument("--dump_actions", action="store_true", help="serialize the actions to disk [/tmp/actions]") parser.add_argument("--botmetafile", type=str, default=None, help="Use this filepath for botmeta instead of from the repo") return parser def set_logger(self): if self.debug: logging.level = logging.DEBUG else: logging.level = logging.INFO logFormatter = \ logging.Formatter("%(asctime)s %(levelname)s %(message)s") rootLogger = logging.getLogger() if self.debug: rootLogger.setLevel(logging.DEBUG) else: rootLogger.setLevel(logging.INFO) logdir = os.path.dirname(self.logfile) if logdir and not os.path.isdir(logdir): os.makedirs(logdir) fileHandler = WatchedFileHandler(self.logfile) fileHandler.setFormatter(logFormatter) rootLogger.addHandler(fileHandler) consoleHandler = logging.StreamHandler() consoleHandler.setFormatter(logFormatter) rootLogger.addHandler(consoleHandler) def start(self): if self.force_rate_limit: logging.warning('attempting to trigger rate limit') self.trigger_rate_limit() return if self.daemonize: logging.info('starting daemonize loop') self.loop() else: logging.info('starting single run') self.run() logging.info('stopping bot') @RateLimited def _connect(self): """Connects to GitHub's API""" if self.github_token: return Github(base_url=self.github_url, login_or_token=self.github_token) else: return Github( base_url=self.github_url, login_or_token=self.github_user, password=self.github_pass ) def is_pr(self, issue): if '/pull/' in issue.html_url: return True else: return False def is_issue(self, issue): return not self.is_pr(issue) @RateLimited def get_members(self, organization): """Get members of an organization Args: organization: name of the organization Returns: A list of GitHub login belonging to the organization """ members = [] update = False write_cache = False now = self.get_current_time() gh_org = self._connect().get_organization(organization) cachedir = os.path.join(self.cachedir_base, organization) if not os.path.isdir(cachedir): os.makedirs(cachedir) cachefile = os.path.join(cachedir, 'members.pickle') if os.path.isfile(cachefile): with open(cachefile, 'rb') as f: mdata = pickle_load(f) members = mdata[1] if mdata[0] < gh_org.updated_at: update = True else: update = True write_cache = True if update: members = gh_org.get_members() members = [x.login for x in members] # save the data if write_cache: mdata = [now, members] with open(cachefile, 'wb') as f: pickle_dump(mdata, f) return members @RateLimited def get_core_team(self, organization, teams): """Get members of the core team Args: organization: name of the teams' organization teams: list of teams that compose the project core team Returns: A list of GitHub login belonging to teams """ members = set() conn = self._connect() gh_org = conn.get_organization(organization) for team in gh_org.get_teams(): if team.name in teams: for member in team.get_members(): members.add(member.login) return sorted(members) #@RateLimited def get_valid_labels(self, repo): # use the repo wrapper to enable caching+updating if not self.ghw: self.gh = self._connect() self.ghw = GithubWrapper(self.gh) rw = self.ghw.get_repo(repo) vlabels = [] for vl in rw.get_labels(): vlabels.append(vl.name) return vlabels def loop(self): '''Call the run method in a defined interval''' while True: self.run() self.ITERATION += 1 interval = self.daemonize_interval logging.info('sleep %ss (%sm)' % (interval, interval / 60)) time.sleep(interval) @abc.abstractmethod def run(self): pass def get_current_time(self): return datetime.utcnow() def render_boilerplate(self, tvars, boilerplate=None): template = environment.get_template('%s.j2' % boilerplate) comment = template.render(**tvars) return comment def apply_actions(self, iw, actions): action_meta = {'REDO': False} if actions.count() > 0: if self.dump_actions: self.dump_action_dict(iw, actions) if self.dry_run: print("Dry-run specified, skipping execution of actions") else: if self.force: print("Running actions non-interactive as you forced.") self.execute_actions(iw, actions) return action_meta cont = input("Take recommended actions (y/N/a/R/T/DEBUG)? ") if cont in ('a', 'A'): sys.exit(0) if cont in ('Y', 'y'): self.execute_actions(iw, actions) if cont == 'T': self.template_wizard(iw) action_meta['REDO'] = True if cont in ('r', 'R'): action_meta['REDO'] = True if cont == 'DEBUG': # put the user into a breakpoint to do live debug action_meta['REDO'] = True import epdb; epdb.st() elif self.always_pause: print("Skipping, but pause.") cont = input("Continue (Y/n/a/R/T/DEBUG)? ") if cont in ('a', 'A', 'n', 'N'): sys.exit(0) if cont == 'T': self.template_wizard(iw) action_meta['REDO'] = True elif cont in ('r', 'R'): action_meta['REDO'] = True elif cont == 'DEBUG': # put the user into a breakpoint to do live debug import epdb; epdb.st() action_meta['REDO'] = True elif self.force_description_fixer: # FIXME: self.FIXED_ISSUES not defined since 1cf9674cd38edbd17aff906d72296c99043e5c13 # either define self.FIXED_ISSUES, either remove this method # FIXME force_description_fixer is not known by DefaultTriager (only # by AnsibleTriage): if not removed, move it to AnsibleTriage if iw.html_url not in self.FIXED_ISSUES: if self.meta['template_missing_sections']: changed = self.template_wizard(iw) if changed: action_meta['REDO'] = True self.FIXED_ISSUES.append(iw.html_url) else: print("Skipping.") # let the upper level code redo this issue return action_meta def template_wizard(self, iw): DF = DescriptionFixer(iw, self.meta) old = iw.body old_lines = old.split('\n') new = DF.new_description new_lines = new.split('\n') total_lines = len(new_lines) if len(old_lines) > total_lines: total_lines = len(old_lines) if len(new_lines) < total_lines: delta = total_lines - len(new_lines) for x in xrange(0, delta): new_lines.append('') if len(old_lines) < total_lines: delta = total_lines - len(old_lines) for x in xrange(0, delta): old_lines.append('') line = '--------------------------------------------------------' padding = 100 print("%s|%s" % (line.ljust(padding), line)) for c1, c2 in zip(old_lines, new_lines): if len(c1) > padding: c1 = c1[:padding-4] if len(c2) > padding: c2 = c2[:padding-4] print("%s|%s" % (c1.rstrip().ljust(padding), c2.rstrip())) print("%s|%s" % (line.rstrip().ljust(padding), line)) print('# ' + iw.html_url) cont = input("Apply this new description? (Y/N) ") if cont == 'Y': iw.set_description(DF.new_description) return True else: return False def execute_actions(self, iw, actions): """Turns the actions into API calls""" for comment in actions.comments: logging.info("acton: comment - " + comment) iw.add_comment(comment=comment) if actions.close: # https://github.com/PyGithub/PyGithub/blob/master/github/Issue.py#L263 logging.info('action: close') iw.instance.edit(state='closed') return for unlabel in actions.unlabel: logging.info('action: unlabel - ' + unlabel) iw.remove_label(label=unlabel) for newlabel in actions.newlabel: logging.info('action: label - ' + newlabel) iw.add_label(label=newlabel) for user in actions.assign: logging.info('action: assign - ' + user) iw.assign_user(user) for user in actions.unassign: logging.info('action: unassign - ' + user) iw.unassign_user(user) if actions.merge: iw.merge() #@RateLimited def is_pr_merged(self, number, repo): '''Check if a PR# has been merged or not''' if number is None: if C.DEFAULT_BREAKPOINTS: logging.error('breakpoint!') import epdb; epdb.st() raise Exception('Can not check merge state on the number: None') merged = False pr = None try: pr = repo.get_pullrequest(number) except Exception as e: print(e) if pr: try: merged = pr.merged except Exception as e: logging.debug(e) if C.DEFAULT_BREAKPOINTS: logging.error('breakpoint!') import epdb; epdb.st() return merged def trigger_rate_limit(self): '''Repeatedly make calls to exhaust rate limit''' self.gh = self._connect() self.ghw = GithubWrapper(self.gh) while True: cachedir = os.path.join(self.cachedir_base, self.repo) thisrepo = self.ghw.get_repo(self.repo, verbose=False) issues = thisrepo.repo.get_issues() rl = thisrepo.get_rate_limit() pprint(rl) for issue in issues: iw = IssueWrapper( github=self.ghw, repo=thisrepo, issue=issue, cachedir=cachedir ) iw.history rl = thisrepo.get_rate_limit() pprint(rl) def dump_action_dict(self, issue, actions): '''Serialize the action dict to disk for quick(er) debugging''' fn = os.path.join(u'/tmp', u'actions', issue.repo_full_name, to_text(issue.number) + u'.json') dn = os.path.dirname(fn) if not os.path.isdir(dn): os.makedirs(dn) logging.info('dumping {}'.format(fn)) with open(fn, 'wb') as f: f.write(json.dumps(actions, indent=2, sort_keys=True))
class DefaultTriager: """ How to use: 1. Create a new class which inherits from DefaultTriager 2. Implement 'Triager.run(self)' method: - iterate over issues/pull requests - for each issue 1. create 'actions = DefaultActions()' 2. define which action(s) should be done updating 'actions' instance 3. call parent 'apply_actions' methods: 'DefaultTriager.apply_actions(iw, actions)' 3. Run: def main(): Triager().start() """ CLOSING_LABELS = [] def __init__(self, args=None): parser = self.create_parser() self.args = parser.parse_args(args) logging.info('starting bot') self.set_logger() self.cachedir_base = os.path.expanduser(self.args.cachedir_base) self.issue_summaries = {} self.repos = {} # resume is just an overload for the start-at argument resume = self.get_resume() if resume: if self.args.sort == 'desc': self.args.start_at = resume['number'] - 1 else: self.args.start_at = resume['number'] + 1 logging.info('creating api wrapper') self.ghw = GithubWrapper(url=C.DEFAULT_GITHUB_URL, user=C.DEFAULT_GITHUB_USERNAME, passw=C.DEFAULT_GITHUB_PASSWORD, token=C.DEFAULT_GITHUB_TOKEN, cachedir=self.cachedir_base) logging.info('creating graphql client') self.gqlc = GithubGraphQLClient(C.DEFAULT_GITHUB_TOKEN, server=C.DEFAULT_GITHUB_URL) self._maintainer_team = None @property def maintainer_team(self): # Note: this assumes that the token used by the bot has access to check # team privileges across potentially more than one organization if self._maintainer_team is None: self._maintainer_team = [] teams = C.DEFAULT_GITHUB_MAINTAINERS for team in teams: _org, _team = team.split('/') self._maintainer_team.extend(self.ghw.get_members(_org, _team)) return sorted( set(self._maintainer_team).difference(C.DEFAULT_BOT_NAMES)) @classmethod def create_parser(cls): parser = argparse.ArgumentParser() parser.add_argument( "--botmetafile", type=str, default=None, help="Use this filepath for botmeta instead of from the repo") parser.add_argument("--cachedir", type=str, dest='cachedir_base', default='~/.ansibullbot/cache') parser.add_argument("--daemonize", action="store_true", help="run in a continuos loop") parser.add_argument("--daemonize_interval", type=int, default=(30 * 60), help="seconds to sleep between loop iterations") parser.add_argument("--debug", "-d", action="store_true", help="Debug output") parser.add_argument("--dry-run", "-n", action="store_true", help="Don't make any changes") parser.add_argument( "--dump_actions", action="store_true", help="serialize the actions to disk [/tmp/actions]") parser.add_argument("--force", "-f", action="store_true", help="Do not ask questions") parser.add_argument("--logfile", type=str, default='/var/log/ansibullbot.log', help="Send logging to this file") parser.add_argument("--ignore_state", action="store_true", help="Do not skip processing closed issues") parser.add_argument("--last", type=int, help="triage the last N issues or PRs") parser.add_argument("--only_closed", action="store_true", help="Triage closed issues|prs only") parser.add_argument("--only_issues", action="store_true", help="Triage issues only") parser.add_argument("--only_prs", action="store_true", help="Triage pullrequests only") parser.add_argument("--pause", "-p", action="store_true", dest="always_pause", help="Always pause between prs|issues") parser.add_argument( "--pr", "--id", type=str, help="Triage only the specified pr|issue (separated by commas)") parser.add_argument( "--resume", action="store_true", dest="resume_enabled", help="pickup right after where the bot last stopped") parser.add_argument("--repo", "-r", type=str, help="Github repo to triage (defaults to all)") parser.add_argument("--skiprepo", action='append', help="Github repo to skip triaging") parser.add_argument("--start-at", type=int, help="Start triage at the specified pr|issue") parser.add_argument("--sort", default='desc', choices=['asc', 'desc'], help="Direction to sort issues [desc=9-0 asc=0-9]") return parser def set_logger(self): set_logger(debug=self.args.debug, logfile=self.args.logfile) def start(self): if self.args.daemonize: logging.info('starting daemonize loop') self.loop() else: logging.info('starting single run') self.run() logging.info('stopping bot') def loop(self): """Call the run method in a defined interval""" while True: self.run() interval = self.args.daemonize_interval logging.info('sleep %ss (%sm)' % (interval, interval / 60)) time.sleep(interval) @abc.abstractmethod def run(self): pass def render_boilerplate(self, tvars, boilerplate=None): template = environment.get_template('%s.j2' % boilerplate) comment = template.render(**tvars) return comment def apply_actions(self, iw, actions): action_meta = {'REDO': False} if actions.count() > 0: if self.args.dump_actions: self.dump_action_dict(iw, actions.__dict__) if self.args.dry_run: print("Dry-run specified, skipping execution of actions") else: if self.args.force: print("Running actions non-interactive as you forced.") self.execute_actions(iw, actions) return action_meta cont = input("Take recommended actions (y/N/a/R/DEBUG)? ") if cont in ('a', 'A'): sys.exit(0) if cont in ('Y', 'y'): self.execute_actions(iw, actions) if cont in ('r', 'R'): action_meta['REDO'] = True if cont == 'DEBUG': # put the user into a breakpoint to do live debug action_meta['REDO'] = True import epdb epdb.st() elif self.args.always_pause: print("Skipping, but pause.") cont = input("Continue (Y/n/a/R/DEBUG)? ") if cont in ('a', 'A', 'n', 'N'): sys.exit(0) elif cont in ('r', 'R'): action_meta['REDO'] = True elif cont == 'DEBUG': # put the user into a breakpoint to do live debug import epdb epdb.st() action_meta['REDO'] = True else: print("Skipping.") # let the upper level code redo this issue return action_meta def execute_actions(self, iw, actions): """Turns the actions into API calls""" for commentid in actions.uncomment: iw.remove_comment_by_id(commentid) for comment in actions.comments: logging.info("acton: comment - " + comment) iw.add_comment(comment=comment) if actions.close: for newlabel in actions.newlabel: if newlabel in self.CLOSING_LABELS: logging.info('action: label - ' + newlabel) iw.add_label(label=newlabel) logging.info('action: close') iw.instance.edit(state='closed') else: for unlabel in actions.unlabel: logging.info('action: unlabel - ' + unlabel) iw.remove_label(label=unlabel) for newlabel in actions.newlabel: logging.info('action: label - ' + newlabel) iw.add_label(label=newlabel) if actions.merge: iw.merge() def dump_action_dict(self, issue, actions): """Serialize the action dict to disk for quick(er) debugging""" fn = os.path.join('/tmp', 'actions', issue.repo_full_name, str(issue.number) + '.json') dn = os.path.dirname(fn) if not os.path.isdir(dn): os.makedirs(dn) logging.info(f'dumping {fn}') with open(fn, 'w') as f: f.write(json.dumps(actions, indent=2, sort_keys=True)) def get_resume(self): '''Returns a dict with the last issue repo+number processed''' if self.args.pr or not self.args.resume_enabled: return resume_file = os.path.join(self.cachedir_base, 'resume.json') if not os.path.isfile(resume_file): logging.error('Resume: %r not found', resume_file) return None logging.debug('Resume: read %r', resume_file) with open(resume_file, 'r', encoding='utf-8') as f: data = json.loads(f.read()) return data def set_resume(self, repo, number): if self.args.pr or not self.args.resume_enabled: return data = {'repo': repo, 'number': number} resume_file = os.path.join(self.cachedir_base, 'resume.json') with open(resume_file, 'w', encoding='utf-8') as f: json.dump(data, f) def eval_pr_param(self, pr): '''PR/ID can be a number, numberlist, script, jsonfile, or url''' if isinstance(pr, list): pass elif pr.isdigit(): pr = int(pr) elif pr.startswith('http'): rr = requests.get(pr) numbers = rr.json() pr = numbers[:] elif os.path.isfile(pr) and not os.access(pr, os.X_OK): with open(pr) as f: numbers = json.loads(f.read()) pr = numbers[:] elif os.path.isfile(pr) and os.access(pr, os.X_OK): # allow for scripts when trying to target spec issues logging.info('executing %s' % pr) (rc, so, se) = run_command(pr) numbers = json.loads(to_text(so)) if numbers: if isinstance(numbers[0], dict) and 'number' in numbers[0]: numbers = [x['number'] for x in numbers] else: numbers = [int(x) for x in numbers] logging.info('%s numbers after running script' % len(numbers)) pr = numbers[:] elif ',' in pr: numbers = [int(x) for x in pr.split(',')] pr = numbers[:] if not isinstance(pr, list): pr = [pr] return pr def update_issue_summaries(self, repopath=None, issuenums=None): if issuenums and len(issuenums) <= 10: self.issue_summaries[repopath] = {} for num in issuenums: # --pr is an alias to --id and can also be for issues node = self.gqlc.get_summary(repopath, 'pullRequest', num) if node is None: node = self.gqlc.get_summary(repopath, 'issue', num) if node is not None: self.issue_summaries[repopath][to_text(num)] = node else: self.issue_summaries[repopath] = self.gqlc.get_issue_summaries( repopath) def get_stale_numbers(self, reponame): stale = [] for number, summary in self.issue_summaries[reponame].items(): if number in stale: continue if summary['state'] == 'closed': continue number = int(number) mfile = os.path.join(self.cachedir_base, reponame, 'issues', to_text(number), 'meta.json') if not os.path.isfile(mfile): stale.append(number) continue try: with open(mfile, 'rb') as f: meta = json.load(f) except ValueError as e: logging.error('failed to parse %s: %s' % (to_text(mfile), to_text(e))) os.remove(mfile) stale.append(number) continue delta = (datetime.datetime.now() - strip_time_safely(meta['time'])).days if delta > C.DEFAULT_STALE_WINDOW: stale.append(number) stale = sorted({int(x) for x in stale}) if 10 >= len(stale) > 0: logging.info('stale: %s' % ','.join([to_text(x) for x in stale])) return stale @RateLimited def _collect_repo(self, repo, issuenums=None): '''Collect issues for an individual repo''' logging.info('getting repo obj for %s' % repo) if repo not in self.repos: gitrepo = GitRepoWrapper( cachedir=self.cachedir_base, repo=f'https://github.com/{repo}', commit=self.args.ansible_commit, ) self.repos[repo] = { 'repo': self.ghw.get_repo(repo), 'issues': [], 'processed': [], 'since': None, 'stale': [], 'loopcount': 0, 'labels': self.ghw.get_valid_labels(repo), 'gitrepo': gitrepo, } else: # force a clean repo object to limit caching problems logging.info('updating repo') self.repos[repo]['repo'] = self.ghw.get_repo(repo) logging.info('updating checkout') self.repos[repo]['gitrepo'].update() # clear the issues self.repos[repo]['issues'] = {} # increment the loopcount self.repos[repo]['loopcount'] += 1 logging.info('getting issue objs for %s' % repo) self.update_issue_summaries(repopath=repo, issuenums=issuenums) issuecache = {} numbers = self.issue_summaries[repo].keys() numbers = {int(x) for x in numbers} if issuenums: numbers.intersection_update(issuenums) numbers = list(numbers) logging.info('%s known numbers' % len(numbers)) if self.args.daemonize: if not self.repos[repo]['since']: ts = [ x[1]['updated_at'] for x in self.issue_summaries[repo].items() if x[1]['updated_at'] ] ts += [ x[1]['created_at'] for x in self.issue_summaries[repo].items() if x[1]['created_at'] ] ts = sorted(set(ts)) if ts: self.repos[repo]['since'] = ts[-1] else: since = strip_time_safely(self.repos[repo]['since']) api_since = self.repos[repo]['repo'].get_issues(since=since) numbers = [] for x in api_since: numbers.append(x.number) issuecache[x.number] = x numbers = sorted({int(n) for n in numbers}) logging.info('%s numbers after [api] since == %s' % (len(numbers), since)) for k, v in self.issue_summaries[repo].items(): if v['created_at'] is None: # issue is closed and was never processed continue if v['created_at'] > self.repos[repo]['since']: numbers.append(k) numbers = sorted({int(n) for n in numbers}) logging.info('%s numbers after [www] since == %s' % (len(numbers), since)) if self.args.start_at and self.repos[repo]['loopcount'] == 0: numbers = [x for x in numbers if x <= self.args.start_at] logging.info('%s numbers after start-at' % len(numbers)) # Get stale numbers if not targeting if self.args.daemonize and self.repos[repo]['loopcount'] > 0: logging.info('checking for stale numbers') stale = self.get_stale_numbers(repo) self.repos[repo]['stale'] = [int(x) for x in stale] numbers += [int(x) for x in stale] numbers = sorted(set(numbers)) logging.info('%s numbers after stale check' % len(numbers)) ################################################################ # PRE-FILTERING TO PREVENT EXCESSIVE API CALLS ################################################################ # filter just the open numbers if not self.args.only_closed and not self.args.ignore_state: numbers = [ x for x in numbers if (to_text(x) in self.issue_summaries[repo] and self.issue_summaries[repo][to_text(x)]['state'] == 'open') ] logging.info('%s numbers after checking state' % len(numbers)) # filter by type if self.args.only_issues: numbers = [ x for x in numbers if self.issue_summaries[repo][to_text(x)]['type'] == 'issue' ] logging.info('%s numbers after checking type' % len(numbers)) elif self.args.only_prs: numbers = [ x for x in numbers if self.issue_summaries[repo][to_text(x)] ['type'] == 'pullrequest' ] logging.info('%s numbers after checking type' % len(numbers)) numbers = sorted({int(x) for x in numbers}) if self.args.sort == 'desc': numbers = [x for x in reversed(numbers)] if self.args.last and len(numbers) > self.args.last: numbers = numbers[0 - self.args.last:] # Use iterator to avoid requesting all issues upfront self.repos[repo]['issues'] = RepoIssuesIterator( self.repos[repo]['repo'], numbers, issuecache=issuecache) logging.info('getting repo objs for %s complete' % repo) def collect_repos(self): '''Populate the local cache of repos''' logging.info('start collecting repos') for repo in C.DEFAULT_GITHUB_REPOS: # skip repos based on args if self.args.repo and self.args.repo != repo: continue if self.args.skiprepo: if repo in self.args.skiprepo: continue if self.args.pr: numbers = self.eval_pr_param(self.args.pr) self._collect_repo(repo, issuenums=numbers) else: self._collect_repo(repo) logging.info('finished collecting issues')