예제 #1
0
    def setUp(self):
        cache = '/tmp/testcache'
        if os.path.isdir(cache):
            shutil.rmtree(cache)
        os.makedirs(cache)

        gh = GithubMock()
        ghw = GithubWrapper(gh, cachedir=cache)

        gr = ghw.get_repo('test/test')
        # FIXME - this should return a wrapped issue
        gi = gr.get_issue(1)
        self.iw = IssueWrapper(github=gh, repo=gr, issue=gi, cachedir=cache)
        self.iw.gitrepo = GitRepoWrapperMock()
예제 #2
0
    def setUp(self):
        cache = '/tmp/testcache'
        if os.path.isdir(cache):
            shutil.rmtree(cache)
        os.makedirs(cache)

        gh = GithubMock()
        ghw = GithubWrapper(gh, cachedir=cache)

        gr = ghw.get_repo('test/test', verbose=False)
        # FIXME - this should return a wrapped issue
        gi = gr.get_issue(1)
        self.iw = IssueWrapper(github=gh, repo=gr, issue=gi, cachedir=cache)
        self.iw.file_indexer = FileIndexerMock()
예제 #3
0
class DefaultTriager:
    """
    How to use:
    1. Create a new class which inherits from DefaultTriager
    2. Implement 'Triager.run(self)' method:
        - iterate over issues/pull requests
        - for each issue
        1. create 'actions = DefaultActions()'
        2. define which action(s) should be done updating 'actions' instance
        3. call parent 'apply_actions' methods: 'DefaultTriager.apply_actions(iw, actions)'
    3. Run:
    def main():
        Triager().start()
    """
    ITERATION = 0
    debug = False
    cachedir_base = None
    BOTNAMES = C.DEFAULT_BOT_NAMES
    CLOSING_LABELS = []

    def __init__(self, args=None):
        pass

    @classmethod
    def create_parser(cls):
        """Creates an argument parser

        Returns:
            A argparse.ArgumentParser object
        """
        parser = argparse.ArgumentParser()
        parser.add_argument("--cachedir",
                            type=str,
                            dest='cachedir_base',
                            default='~/.ansibullbot/cache')
        parser.add_argument("--logfile",
                            type=str,
                            default='/var/log/ansibullbot.log',
                            help="Send logging to this file")
        parser.add_argument("--daemonize",
                            action="store_true",
                            help="run in a continuos loop")
        parser.add_argument("--daemonize_interval",
                            type=int,
                            default=(30 * 60),
                            help="seconds to sleep between loop iterations")
        parser.add_argument("--debug",
                            "-d",
                            action="store_true",
                            help="Debug output")
        parser.add_argument("--verbose",
                            "-v",
                            action="store_true",
                            help="Verbose output")
        parser.add_argument("--dry-run",
                            "-n",
                            action="store_true",
                            help="Don't make any changes")
        parser.add_argument("--force",
                            "-f",
                            action="store_true",
                            help="Do not ask questions")
        parser.add_argument("--pause",
                            "-p",
                            action="store_true",
                            dest="always_pause",
                            help="Always pause between prs|issues")
        parser.add_argument("--force_rate_limit",
                            action="store_true",
                            help="debug: force the rate limit")
        # useful for debugging
        parser.add_argument(
            "--dump_actions",
            action="store_true",
            help="serialize the actions to disk [/tmp/actions]")
        parser.add_argument(
            "--botmetafile",
            type=str,
            default=None,
            help="Use this filepath for botmeta instead of from the repo")
        return parser

    def set_logger(self):
        set_logger(debug=self.debug, logfile=self.logfile)

    def start(self):

        if self.force_rate_limit:
            logging.warning('attempting to trigger rate limit')
            self.trigger_rate_limit()
            return

        if self.daemonize:
            logging.info('starting daemonize loop')
            self.loop()
        else:
            logging.info('starting single run')
            self.run()
        logging.info('stopping bot')

    @RateLimited
    def _connect(self):
        """Connects to GitHub's API"""
        if self.github_token:
            return Github(base_url=self.github_url,
                          login_or_token=self.github_token)
        else:
            return Github(base_url=self.github_url,
                          login_or_token=self.github_user,
                          password=self.github_pass)

    def is_pr(self, issue):
        if '/pull/' in issue.html_url:
            return True
        else:
            return False

    def is_issue(self, issue):
        return not self.is_pr(issue)

    @RateLimited
    def get_members(self, organization):
        """Get members of an organization

        Args:
            organization: name of the organization

        Returns:
            A list of GitHub login belonging to the organization
        """
        members = []
        update = False
        write_cache = False
        now = self.get_current_time()
        gh_org = self._connect().get_organization(organization)

        cachedir = os.path.join(self.cachedir_base, organization)
        if not os.path.isdir(cachedir):
            os.makedirs(cachedir)

        cachefile = os.path.join(cachedir, 'members.pickle')

        if os.path.isfile(cachefile):
            with open(cachefile, 'rb') as f:
                mdata = pickle.load(f)
            members = mdata[1]
            if mdata[0] < gh_org.updated_at:
                update = True
        else:
            update = True
            write_cache = True

        if update:
            members = gh_org.get_members()
            members = [x.login for x in members]

        # save the data
        if write_cache:
            mdata = [now, members]
            with open(cachefile, 'wb') as f:
                pickle.dump(mdata, f)

        return members

    @RateLimited
    def get_core_team(self, organization, teams):
        """Get members of the core team

        Args:
            organization: name of the teams' organization
            teams: list of teams that compose the project core team

        Returns:
            A list of GitHub login belonging to teams
        """
        members = set()

        conn = self._connect()
        gh_org = conn.get_organization(organization)
        for team in gh_org.get_teams():
            if team.name in teams:
                for member in team.get_members():
                    members.add(member.login)

        return sorted(members)

    #@RateLimited
    def get_valid_labels(self, repo):

        # use the repo wrapper to enable caching+updating
        if not self.ghw:
            self.gh = self._connect()
            self.ghw = GithubWrapper(self.gh)

        rw = self.ghw.get_repo(repo)
        vlabels = []
        for vl in rw.labels:
            vlabels.append(vl.name)

        return vlabels

    def loop(self):
        '''Call the run method in a defined interval'''
        while True:
            self.run()
            self.ITERATION += 1
            interval = self.daemonize_interval
            logging.info('sleep %ss (%sm)' % (interval, interval / 60))
            time.sleep(interval)

    @abc.abstractmethod
    def run(self):
        pass

    def get_current_time(self):
        return datetime.utcnow()

    def render_boilerplate(self, tvars, boilerplate=None):
        template = environment.get_template('%s.j2' % boilerplate)
        comment = template.render(**tvars)
        return comment

    def apply_actions(self, iw, actions):
        action_meta = {'REDO': False}

        if actions.count() > 0:
            if self.dump_actions:
                self.dump_action_dict(iw, actions)

            if self.dry_run:
                print("Dry-run specified, skipping execution of actions")
            else:
                if self.force:
                    print("Running actions non-interactive as you forced.")
                    self.execute_actions(iw, actions)
                    return action_meta
                cont = input("Take recommended actions (y/N/a/R/DEBUG)? ")
                if cont in ('a', 'A'):
                    sys.exit(0)
                if cont in ('Y', 'y'):
                    self.execute_actions(iw, actions)
                if cont in ('r', 'R'):
                    action_meta['REDO'] = True
                if cont == 'DEBUG':
                    # put the user into a breakpoint to do live debug
                    action_meta['REDO'] = True
                    import epdb
                    epdb.st()
        elif self.always_pause:
            print("Skipping, but pause.")
            cont = input("Continue (Y/n/a/R/DEBUG)? ")
            if cont in ('a', 'A', 'n', 'N'):
                sys.exit(0)
            elif cont in ('r', 'R'):
                action_meta['REDO'] = True
            elif cont == 'DEBUG':
                # put the user into a breakpoint to do live debug
                import epdb
                epdb.st()
                action_meta['REDO'] = True
        else:
            print("Skipping.")

        # let the upper level code redo this issue
        return action_meta

    def execute_actions(self, iw, actions):
        """Turns the actions into API calls"""

        for commentid in actions.uncomment:
            iw.remove_comment_by_id(commentid)

        for comment in actions.comments:
            logging.info("acton: comment - " + comment)
            iw.add_comment(comment=comment)

        if actions.close:
            for newlabel in actions.newlabel:
                if newlabel in self.CLOSING_LABELS:
                    logging.info('action: label - ' + newlabel)
                    iw.add_label(label=newlabel)

            # https://github.com/PyGithub/PyGithub/blob/master/github/Issue.py#L263
            logging.info('action: close')
            iw.instance.edit(state='closed')

        else:
            for unlabel in actions.unlabel:
                logging.info('action: unlabel - ' + unlabel)
                iw.remove_label(label=unlabel)
            for newlabel in actions.newlabel:
                logging.info('action: label - ' + newlabel)
                iw.add_label(label=newlabel)

            for user in actions.assign:
                logging.info('action: assign - ' + user)
                iw.assign_user(user)

            for user in actions.unassign:
                logging.info('action: unassign - ' + user)
                iw.unassign_user(user)

            if actions.merge:
                iw.merge()

        # FIXME why?
        self.build_history(iw)

    #@RateLimited
    def is_pr_merged(self, number, repo):
        '''Check if a PR# has been merged or not'''

        if number is None:
            raise Exception('Can not check merge state on the number: None')

        merged = False
        pr = None
        try:
            pr = repo.get_pullrequest(number)
        except Exception as e:
            print(e)
        if pr:
            try:
                merged = pr.merged
            except Exception as e:
                logging.debug(e)
        return merged

    def trigger_rate_limit(self):
        '''Repeatedly make calls to exhaust rate limit'''

        self.gh = self._connect()
        self.ghw = GithubWrapper(self.gh)

        while True:
            cachedir = os.path.join(self.cachedir_base, self.repo)
            thisrepo = self.ghw.get_repo(self.repo, verbose=False)
            issues = thisrepo.repo.get_issues()
            rl = thisrepo.get_rate_limit()
            pprint(rl)

            for issue in issues:
                iw = IssueWrapper(github=self.ghw,
                                  repo=thisrepo,
                                  issue=issue,
                                  cachedir=cachedir)
                iw.history
                rl = thisrepo.get_rate_limit()
                pprint(rl)

    def dump_action_dict(self, issue, actions):
        '''Serialize the action dict to disk for quick(er) debugging'''
        fn = os.path.join('/tmp', 'actions', issue.repo_full_name,
                          to_text(issue.number) + '.json')
        dn = os.path.dirname(fn)
        if not os.path.isdir(dn):
            os.makedirs(dn)

        logging.info(f'dumping {fn}')
        with open(fn, 'wb') as f:
            f.write(json.dumps(actions, indent=2, sort_keys=True))
예제 #4
0
class DefaultTriager(object):

    ITERATION = 0

    def __init__(self):

        parser = self.create_parser()
        args = parser.parse_args()

        for x in vars(args):
            val = getattr(args, x)
            setattr(self, x, val)

        self.last_run = None

        self.github_user = C.DEFAULT_GITHUB_USERNAME
        self.github_pass = C.DEFAULT_GITHUB_PASSWORD
        self.github_token = C.DEFAULT_GITHUB_TOKEN

        # where to store junk
        self.cachedir_base = os.path.expanduser(self.cachedir_base)

        self.set_logger()
        logging.info('starting bot')

        # connect to github
        logging.info('creating api connection')
        self.gh = self._connect()

        # wrap the connection
        logging.info('creating api wrapper')
        self.ghw = GithubWrapper(self.gh, cachedir=self.cachedir_base)

    @classmethod
    def create_parser(cls):
        """Creates an argument parser

        Returns:
            A argparse.ArgumentParser object
        """
        parser = argparse.ArgumentParser()
        parser.add_argument("--cachedir",
                            type=str,
                            dest='cachedir_base',
                            default='~/.ansibullbot/cache')
        parser.add_argument("--logfile",
                            type=str,
                            default='/var/log/ansibullbot.log',
                            help="Send logging to this file")
        parser.add_argument("--daemonize",
                            action="store_true",
                            help="run in a continuos loop")
        parser.add_argument("--daemonize_interval",
                            type=int,
                            default=(30 * 60),
                            help="seconds to sleep between loop iterations")
        parser.add_argument("--debug",
                            "-d",
                            action="store_true",
                            help="Debug output")
        parser.add_argument("--verbose",
                            "-v",
                            action="store_true",
                            help="Verbose output")
        parser.add_argument("--dry-run",
                            "-n",
                            action="store_true",
                            help="Don't make any changes")
        parser.add_argument("--force",
                            "-f",
                            action="store_true",
                            help="Do not ask questions")
        parser.add_argument("--pause",
                            "-p",
                            action="store_true",
                            dest="always_pause",
                            help="Always pause between prs|issues")
        parser.add_argument("--force_rate_limit",
                            action="store_true",
                            help="debug: force the rate limit")
        parser.add_argument("--force_description_fixer",
                            action="store_true",
                            help="Always invoke the description fixer")
        # useful for debugging
        parser.add_argument(
            "--dump_actions",
            action="store_true",
            help="serialize the actions to disk [/tmp/actions]")
        parser.add_argument(
            "--botmetafile",
            type=str,
            default=None,
            help="Use this filepath for botmeta instead of from the repo")
        return parser

    def set_logger(self):
        if self.debug:
            logging.level = logging.DEBUG
        else:
            logging.level = logging.INFO
        logFormatter = \
            logging.Formatter("%(asctime)s %(levelname)s %(message)s")
        rootLogger = logging.getLogger()
        if self.debug:
            rootLogger.setLevel(logging.DEBUG)
        else:
            rootLogger.setLevel(logging.INFO)

        logdir = os.path.dirname(self.logfile)
        if logdir and not os.path.isdir(logdir):
            os.makedirs(logdir)

        fileHandler = WatchedFileHandler(self.logfile)
        fileHandler.setFormatter(logFormatter)
        rootLogger.addHandler(fileHandler)
        consoleHandler = logging.StreamHandler()
        consoleHandler.setFormatter(logFormatter)
        rootLogger.addHandler(consoleHandler)

    def start(self):

        if self.force_rate_limit:
            logging.warning('attempting to trigger rate limit')
            self.trigger_rate_limit()
            return

        if self.daemonize:
            logging.info('starting daemonize loop')
            self.loop()
        else:
            logging.info('starting single run')
            self.run()
        logging.info('stopping bot')

    @RateLimited
    def _connect(self):
        """Connects to GitHub's API"""
        if self.github_token:
            return Github(login_or_token=self.github_token)
        else:
            return Github(login_or_token=self.github_user,
                          password=self.github_pass)

    def is_pr(self, issue):
        if '/pull/' in issue.html_url:
            return True
        else:
            return False

    def is_issue(self, issue):
        return not self.is_pr(issue)

    @RateLimited
    def get_members(self, organization):
        """Get members of an organization

        Args:
            organization: name of the organization

        Returns:
            A list of GitHub login belonging to the organization
        """
        members = []
        update = False
        write_cache = False
        now = self.get_current_time()
        gh_org = self._connect().get_organization(organization)

        cachedir = os.path.join(self.cachedir_base, organization)
        if not os.path.isdir(cachedir):
            os.makedirs(cachedir)

        cachefile = os.path.join(cachedir, 'members.pickle')

        if os.path.isfile(cachefile):
            with open(cachefile, 'rb') as f:
                mdata = pickle.load(f)
            members = mdata[1]
            if mdata[0] < gh_org.updated_at:
                update = True
        else:
            update = True
            write_cache = True

        if update:
            members = gh_org.get_members()
            members = [x.login for x in members]

        # save the data
        if write_cache:
            mdata = [now, members]
            with open(cachefile, 'wb') as f:
                pickle.dump(mdata, f)

        return members

    @RateLimited
    def get_core_team(self, organization, teams):
        """Get members of the core team

        Args:
            organization: name of the teams' organization
            teams: list of teams that compose the project core team

        Returns:
            A list of GitHub login belonging to teams
        """
        members = set()

        conn = self._connect()
        gh_org = conn.get_organization(organization)
        for team in gh_org.get_teams():
            if team.name in teams:
                for member in team.get_members():
                    members.add(member.login)

        return sorted(members)

    #@RateLimited
    def get_valid_labels(self, repo):

        # use the repo wrapper to enable caching+updating
        if not self.ghw:
            self.gh = self._connect()
            self.ghw = GithubWrapper(self.gh)

        rw = self.ghw.get_repo(repo)
        vlabels = []
        for vl in rw.get_labels():
            vlabels.append(vl.name)

        return vlabels

    def loop(self):
        '''Call the run method in a defined interval'''
        while True:
            self.run()
            self.ITERATION += 1
            interval = self.daemonize_interval
            logging.info('sleep %ss (%sm)' % (interval, interval / 60))
            time.sleep(interval)

    @abc.abstractmethod
    def run(self):
        pass

    def get_current_time(self):
        return datetime.utcnow()

    def render_boilerplate(self, tvars, boilerplate=None):
        template = environment.get_template('%s.j2' % boilerplate)
        comment = template.render(**tvars)
        return comment

    def apply_actions(self, iw, actions):

        action_meta = {'REDO': False}

        if actions.count() > 0:

            if self.dump_actions:
                self.dump_action_dict(iw, actions)

            if self.dry_run:
                print("Dry-run specified, skipping execution of actions")
            else:
                if self.force:
                    print("Running actions non-interactive as you forced.")
                    self.execute_actions(iw, actions)
                    return action_meta
                cont = raw_input(
                    "Take recommended actions (y/N/a/R/T/DEBUG)? ")
                if cont in ('a', 'A'):
                    sys.exit(0)
                if cont in ('Y', 'y'):
                    self.execute_actions(iw, actions)
                if cont == 'T':
                    self.template_wizard(iw)
                    action_meta['REDO'] = True
                if cont == 'r' or cont == 'R':
                    action_meta['REDO'] = True
                if cont == 'DEBUG':
                    # put the user into a breakpoint to do live debug
                    action_meta['REDO'] = True
                    import epdb
                    epdb.st()
        elif self.always_pause:
            print("Skipping, but pause.")
            cont = raw_input("Continue (Y/n/a/R/T/DEBUG)? ")
            if cont in ('a', 'A', 'n', 'N'):
                sys.exit(0)
            if cont == 'T':
                self.template_wizard(iw)
                action_meta['REDO'] = True
            elif cont == 'REDO':
                action_meta['REDO'] = True
            elif cont == 'DEBUG':
                # put the user into a breakpoint to do live debug
                import epdb
                epdb.st()
                action_meta['REDO'] = True
        elif self.force_description_fixer:
            # FIXME: self.FIXED_ISSUES not defined since 1cf9674cd38edbd17aff906d72296c99043e5c13
            #        either define self.FIXED_ISSUES, either remove this method
            # FIXME force_description_fixer is not known by DefaultTriager (only
            #       by AnsibleTriage): if not removed, move it to AnsibleTriage
            if iw.html_url not in self.FIXED_ISSUES:
                if self.meta['template_missing_sections']:
                    changed = self.template_wizard(iw)
                    if changed:
                        action_meta['REDO'] = True
                self.FIXED_ISSUES.append(iw.html_url)
        else:
            print("Skipping.")

        # let the upper level code redo this issue
        return action_meta

    def template_wizard(self, iw):

        DF = DescriptionFixer(iw, self.meta)

        old = iw.body
        old_lines = old.split('\n')
        new = DF.new_description
        new_lines = new.split('\n')

        total_lines = len(new_lines)
        if len(old_lines) > total_lines:
            total_lines = len(old_lines)

        if len(new_lines) < total_lines:
            delta = total_lines - len(new_lines)
            for x in xrange(0, delta):
                new_lines.append('')

        if len(old_lines) < total_lines:
            delta = total_lines - len(old_lines)
            for x in xrange(0, delta):
                old_lines.append('')

        line = '--------------------------------------------------------'
        padding = 100
        print("%s|%s" % (line.ljust(padding), line))
        for c1, c2 in zip(old_lines, new_lines):
            if len(c1) > padding:
                c1 = c1[:padding - 4]
            if len(c2) > padding:
                c2 = c2[:padding - 4]
            print("%s|%s" % (c1.rstrip().ljust(padding), c2.rstrip()))
        print("%s|%s" % (line.rstrip().ljust(padding), line))

        print('# ' + iw.html_url)
        cont = raw_input("Apply this new description? (Y/N) ")
        if cont == 'Y':
            iw.set_description(DF.new_description)
            return True
        else:
            return False

    def execute_actions(self, iw, actions):
        """Turns the actions into API calls"""

        for comment in actions.comments:
            logging.info("acton: comment - " + comment)
            iw.add_comment(comment=comment)
        if actions.close:
            # https://github.com/PyGithub/PyGithub/blob/master/github/Issue.py#L263
            logging.info('action: close')
            iw.instance.edit(state='closed')
            return

        for unlabel in actions.unlabel:
            logging.info('action: unlabel - ' + unlabel)
            iw.remove_label(label=unlabel)
        for newlabel in actions.newlabel:
            logging.info('action: label - ' + newlabel)
            iw.add_label(label=newlabel)

        for user in actions.assign:
            logging.info('action: assign - ' + user)
            iw.assign_user(user)

        for user in actions.unassign:
            logging.info('action: unassign - ' + user)
            iw.unassign_user(user)

        if actions.merge:
            iw.merge()

    #@RateLimited
    def is_pr_merged(self, number, repo):
        '''Check if a PR# has been merged or not'''

        if number is None:
            import epdb
            epdb.st()
            raise Exception('Can not check merge state on the number: None')

        merged = False
        pr = None
        try:
            pr = repo.get_pullrequest(number)
        except Exception as e:
            print(e)
        if pr:
            try:
                merged = pr.merged
            except Exception as e:
                logging.debug(e)
                import epdb
                epdb.st()
        return merged

    def trigger_rate_limit(self):
        '''Repeatedly make calls to exhaust rate limit'''

        self.gh = self._connect()
        self.ghw = GithubWrapper(self.gh)

        while True:
            cachedir = os.path.join(self.cachedir_base, self.repo)
            thisrepo = self.ghw.get_repo(self.repo, verbose=False)
            issues = thisrepo.repo.get_issues()
            rl = thisrepo.get_rate_limit()
            pprint(rl)

            for issue in issues:
                iw = IssueWrapper(github=self.ghw,
                                  repo=thisrepo,
                                  issue=issue,
                                  cachedir=cachedir)
                iw.history
                rl = thisrepo.get_rate_limit()
                pprint(rl)

    def dump_action_dict(self, issue, actions):
        '''Serialize the action dict to disk for quick(er) debugging'''
        fn = os.path.join('/tmp', 'actions', issue.repo_full_name,
                          str(issue.number) + '.json')
        dn = os.path.dirname(fn)
        if not os.path.isdir(dn):
            os.makedirs(dn)

        logging.info('dumping {}'.format(fn))
        with open(fn, 'wb') as f:
            f.write(json.dumps(actions, indent=2, sort_keys=True))
예제 #5
0
class DefaultTriager(object):

    ITERATION = 0

    def __init__(self, args):

        self.args = args
        self.last_run = None
        self.daemonize = None
        self.daemonize_interval = None
        self.dry_run = False
        self.force = False

        self.configfile = self.args.configfile
        self.config = ConfigParser.ConfigParser()
        self.config.read([self.configfile])

        try:
            self.github_user = self.config.get('defaults', 'github_username')
        except:
            self.github_user = None

        try:
            self.github_pass = self.config.get('defaults', 'github_password')
        except:
            self.github_pass = None

        try:
            self.github_token = self.config.get('defaults', 'github_token')
        except:
            self.github_token = None

        self.repopath = self.args.repo
        self.logfile = self.args.logfile

        # where to store junk
        self.cachedir = self.args.cachedir
        self.cachedir = os.path.expanduser(self.cachedir)
        self.cachedir_base = self.cachedir

        self.set_logger()
        logging.info('starting bot')

        logging.debug('setting bot attributes')
        for x in vars(self.args):
            val = getattr(self.args, x)
            setattr(self, x, val)

        if hasattr(self.args, 'pause') and self.args.pause:
            self.always_pause = True

        # connect to github
        logging.info('creating api connection')
        self.gh = self._connect()

        # wrap the connection
        logging.info('creating api wrapper')
        self.ghw = GithubWrapper(self.gh, cachedir=self.cachedir)

        # get valid labels
        logging.info('getting labels')
        self.valid_labels = self.get_valid_labels(self.repopath)

    @property
    def resume(self):
        '''Returns a dict with the last issue repo+number processed'''
        if not hasattr(self, 'args'):
            return None
        if hasattr(self.args, 'pr') and self.args.pr:
            return None
        if not hasattr(self.args, 'resume'):
            return None
        if not self.args.resume:
            return None

        if hasattr(self, 'cachedir_base'):
            resume_file = os.path.join(self.cachedir_base, 'resume.json')
        else:
            resume_file = os.path.join(self.cachedir, 'resume.json')
        if not os.path.isfile(resume_file):
            return None

        with open(resume_file, 'rb') as f:
            data = json.loads(f.read())
        return data

    def set_resume(self, repo, number):
        if not hasattr(self, 'args'):
            return None
        if hasattr(self.args, 'pr') and self.args.pr:
            return None
        if not hasattr(self.args, 'resume'):
            return None
        if not self.args.resume:
            return None

        data = {'repo': repo, 'number': number}
        if hasattr(self, 'cachedir_base'):
            resume_file = os.path.join(self.cachedir_base, 'resume.json')
        else:
            resume_file = os.path.join(self.cachedir, 'resume.json')
        with open(resume_file, 'wb') as f:
            f.write(json.dumps(data, indent=2))

    def set_logger(self):
        if hasattr(self.args, 'debug') and self.args.debug:
            logging.level = logging.DEBUG
        else:
            logging.level = logging.INFO
        logFormatter = \
            logging.Formatter("%(asctime)s %(levelname)s %(message)s")
        rootLogger = logging.getLogger()
        if hasattr(self.args, 'debug') and self.args.debug:
            rootLogger.setLevel(logging.DEBUG)
        else:
            rootLogger.setLevel(logging.INFO)

        if hasattr(self.args, 'logfile'):
            logfile = self.args.logfile
        else:
            logfile = '/tmp/ansibullbot.log'

        logdir = os.path.dirname(logfile)
        if logdir and not os.path.isdir(logdir):
            os.makedirs(logdir)

        fileHandler = logging.FileHandler(logfile)
        fileHandler.setFormatter(logFormatter)
        rootLogger.addHandler(fileHandler)
        consoleHandler = logging.StreamHandler()
        consoleHandler.setFormatter(logFormatter)
        rootLogger.addHandler(consoleHandler)

    def start(self):

        if hasattr(self.args, 'force_rate_limit') and \
                self.args.force_rate_limit:
            logging.warning('attempting to trigger rate limit')
            self.trigger_rate_limit()
            return

        if hasattr(self.args, 'daemonize') and self.args.daemonize:
            logging.info('starting daemonize loop')
            self.loop()
        else:
            logging.info('starting single run')
            self.run()
        logging.info('stopping bot')

    @RateLimited
    def _connect(self):
        """Connects to GitHub's API"""
        if self.github_token:
            return Github(login_or_token=self.github_token)
        else:
            return Github(login_or_token=self.github_user,
                          password=self.github_pass)

    @abc.abstractmethod
    def _get_repo_path(self):
        pass

    def is_pr(self, issue):
        if '/pull/' in issue.html_url:
            return True
        else:
            return False

    def is_issue(self, issue):
        return not self.is_pr(issue)

    @RateLimited
    def get_members(self, organization):
        """Get members of an organization

        Args:
            organization: name of the organization

        Returns:
            A list of GitHub login belonging to the organization
        """
        members = []
        update = False
        write_cache = False
        now = self.get_current_time()
        gh_org = self._connect().get_organization(organization)

        cachedir = self.cachedir
        if cachedir.endswith('/issues'):
            cachedir = os.path.dirname(cachedir)
        cachefile = os.path.join(cachedir, 'members.pickle')

        if not os.path.isdir(cachedir):
            os.makedirs(cachedir)

        if os.path.isfile(cachefile):
            with open(cachefile, 'rb') as f:
                mdata = pickle.load(f)
            members = mdata[1]
            if mdata[0] < gh_org.updated_at:
                update = True
        else:
            update = True
            write_cache = True

        if update:
            members = gh_org.get_members()
            members = [x.login for x in members]

        # save the data
        if write_cache:
            mdata = [now, members]
            with open(cachefile, 'wb') as f:
                pickle.dump(mdata, f)

        return members

    @RateLimited
    def get_core_team(self, organization, teams):
        """Get members of the core team

        Args:
            organization: name of the teams' organization
            teams: list of teams that compose the project core team

        Returns:
            A list of GitHub login belonging to teams
        """
        members = set()

        conn = self._connect()
        gh_org = conn.get_organization(organization)
        for team in gh_org.get_teams():
            if team.name in teams:
                for member in team.get_members():
                    members.add(member.login)

        return sorted(members)

    #@RateLimited
    def get_valid_labels(self, repo=None):

        # use the repo wrapper to enable caching+updating
        if not self.ghw:
            self.gh = self._connect()
            self.ghw = GithubWrapper(self.gh)

        if not repo:
            # OLD workflow
            self.repo = self.ghw.get_repo(self._get_repo_path())
            vlabels = []
            for vl in self.repo.get_labels():
                vlabels.append(vl.name)
        else:
            # v3 workflow
            rw = self.ghw.get_repo(repo)
            vlabels = []
            for vl in rw.get_labels():
                vlabels.append(vl.name)

        return vlabels

    def debug(self, msg=""):
        """Prints debug message if verbosity is given"""
        if self.verbose:
            print("Debug: " + msg)

    def loop(self):
        '''Call the run method in a defined interval'''
        while True:
            self.run()
            self.ITERATION += 1
            interval = self.args.daemonize_interval
            logging.info('sleep %ss (%sm)' % (interval, interval / 60))
            time.sleep(interval)

    @abc.abstractmethod
    def run(self):
        pass

    def get_current_time(self):
        return datetime.utcnow()

    def render_boilerplate(self, tvars, boilerplate=None):
        template = environment.get_template('%s.j2' % boilerplate)
        comment = template.render(**tvars)
        return comment

    def check_safe_match(self, iw, actions):
        """ Turn force on or off depending on match characteristics """
        safe_match = False

        if actions.count() == 0:
            safe_match = True

        elif not actions.close and not actions.unlabel:
            if len(actions.newlabel) == 1:
                if actions.newlabel[0].startswith('affects_'):
                    safe_match = True

        else:
            safe_match = False
            if self.module:
                if self.module in iw.instance.title.lower():
                    safe_match = True

        # be more lenient on re-notifications
        if not safe_match:
            if not actions.close and \
                    not actions.unlabel and \
                    not actions.newlabel:

                if len(actions.comments) == 1:
                    if 'still waiting' in actions.comments[0]:
                        safe_match = True

        if safe_match:
            self.force = True
        else:
            self.force = False

    def apply_actions(self, iw, actions):

        action_meta = {'REDO': False}

        if hasattr(self, 'safe_force') and self.safe_force:
            self.check_safe_match(iw, actions)

        if actions.count() > 0:

            if hasattr(self, 'args'):
                if hasattr(self.args, 'dump_actions'):
                    if self.args.dump_actions:
                        self.dump_action_dict(iw, actions)

            if self.dry_run:
                print("Dry-run specified, skipping execution of actions")
            else:
                if self.force:
                    print("Running actions non-interactive as you forced.")
                    self.execute_actions(iw, actions)
                    return action_meta
                cont = raw_input(
                    "Take recommended actions (y/N/a/R/T/DEBUG)? ")
                if cont in ('a', 'A'):
                    sys.exit(0)
                if cont in ('Y', 'y'):
                    self.execute_actions(iw, actions)
                if cont == 'T':
                    self.template_wizard(iw)
                    action_meta['REDO'] = True
                if cont == 'r' or cont == 'R':
                    action_meta['REDO'] = True
                if cont == 'DEBUG':
                    # put the user into a breakpoint to do live debug
                    action_meta['REDO'] = True
                    import epdb
                    epdb.st()
        elif self.always_pause:
            print("Skipping, but pause.")
            cont = raw_input("Continue (Y/n/a/R/T/DEBUG)? ")
            if cont in ('a', 'A', 'n', 'N'):
                sys.exit(0)
            if cont == 'T':
                self.template_wizard(iw)
                action_meta['REDO'] = True
            elif cont == 'REDO':
                action_meta['REDO'] = True
            elif cont == 'DEBUG':
                # put the user into a breakpoint to do live debug
                import epdb
                epdb.st()
                action_meta['REDO'] = True
        elif hasattr(self, 'force_description_fixer'
                     ) and self.args.force_description_fixer:
            if iw.html_url not in self.FIXED_ISSUES:
                if self.meta['template_missing_sections']:
                    changed = self.template_wizard(iw)
                    if changed:
                        action_meta['REDO'] = True
                self.FIXED_ISSUES.append(iw.html_url)
        else:
            print("Skipping.")

        # let the upper level code redo this issue
        return action_meta

    def template_wizard(self, iw):

        DF = DescriptionFixer(iw, self.meta)

        old = iw.body
        old_lines = old.split('\n')
        new = DF.new_description
        new_lines = new.split('\n')

        total_lines = len(new_lines)
        if len(old_lines) > total_lines:
            total_lines = len(old_lines)

        if len(new_lines) < total_lines:
            delta = total_lines - len(new_lines)
            for x in xrange(0, delta):
                new_lines.append('')

        if len(old_lines) < total_lines:
            delta = total_lines - len(old_lines)
            for x in xrange(0, delta):
                old_lines.append('')

        line = '--------------------------------------------------------'
        padding = 100
        print("%s|%s" % (line.ljust(padding), line))
        for c1, c2 in zip(old_lines, new_lines):
            if len(c1) > padding:
                c1 = c1[:padding - 4]
            if len(c2) > padding:
                c2 = c2[:padding - 4]
            print("%s|%s" % (c1.rstrip().ljust(padding), c2.rstrip()))
        print("%s|%s" % (line.rstrip().ljust(padding), line))

        print('# ' + iw.html_url)
        cont = raw_input("Apply this new description? (Y/N) ")
        if cont == 'Y':
            iw.set_description(DF.new_description)
            return True
        else:
            return False

    def execute_actions(self, iw, actions):
        """Turns the actions into API calls"""

        for comment in actions.comments:
            logging.info("acton: comment - " + comment)
            iw.add_comment(comment=comment)
        if actions.close:
            # https://github.com/PyGithub/PyGithub/blob/master/github/Issue.py#L263
            logging.info('action: close')
            iw.instance.edit(state='closed')
            return

        for unlabel in actions.unlabel:
            logging.info('action: unlabel - ' + unlabel)
            iw.remove_label(label=unlabel)
        for newlabel in actions.newlabel:
            logging.info('action: label - ' + newlabel)
            iw.add_label(label=newlabel)

        for user in actions.assign:
            logging.info('action: assign - ' + user)
            iw.assign_user(user)

        for user in actions.unassign:
            logging.info('action: unassign - ' + user)
            iw.unassign_user(user)

        if actions.merge:
            iw.merge()

    @RateLimited
    def is_pr_merged(self, number, repo=None):
        '''Check if a PR# has been merged or not'''
        merged = False
        pr = None
        try:
            if not repo:
                pr = self.repo.get_pullrequest(number)
            else:
                pr = repo.get_pullrequest(number)
        except Exception as e:
            print(e)
        if pr:
            merged = pr.merged
        return merged

    def dump_action_dict(self, issue, actions):
        '''Serialize the action dict to disk for quick(er) debugging'''
        fn = os.path.join('/tmp', 'actions', issue.repo_full_name,
                          str(issue.number) + '.json')
        dn = os.path.dirname(fn)
        if not os.path.isdir(dn):
            os.makedirs(dn)

        logging.info('dumping {}'.format(fn))
        with open(fn, 'wb') as f:
            f.write(json.dumps(actions, indent=2, sort_keys=True))
예제 #6
0
class DefaultTriager(object):
    """
    How to use:
    1. Create a new class which inherits from DefaultTriager
    2. Implement 'Triager.run(self)' method:
        - iterate over issues/pull requests
        - for each issue
        1. create 'actions = DefaultActions()'
        2. define which action(s) should be done updating 'actions' instance
        3. call parent 'apply_actions' methods: 'DefaultTriager.apply_actions(iw, actions)'
    3. Run:
    def main():
        Triager().start()
    """
    ITERATION = 0

    def __init__(self):

        parser = self.create_parser()
        args = parser.parse_args()

        for x in vars(args):
            val = getattr(args, x)
            setattr(self, x, val)

        self.last_run = None

        self.github_url = C.DEFAULT_GITHUB_URL
        self.github_user = C.DEFAULT_GITHUB_USERNAME
        self.github_pass = C.DEFAULT_GITHUB_PASSWORD
        self.github_token = C.DEFAULT_GITHUB_TOKEN

        # where to store junk
        self.cachedir_base = os.path.expanduser(self.cachedir_base)

        self.set_logger()
        logging.info('starting bot')

        # connect to github
        logging.info('creating api connection')
        self.gh = self._connect()

        # wrap the connection
        logging.info('creating api wrapper')
        self.ghw = GithubWrapper(self.gh, cachedir=self.cachedir_base)

    @classmethod
    def create_parser(cls):
        """Creates an argument parser

        Returns:
            A argparse.ArgumentParser object
        """
        parser = argparse.ArgumentParser()
        parser.add_argument("--cachedir", type=str, dest='cachedir_base',
                            default='~/.ansibullbot/cache')
        parser.add_argument("--logfile", type=str,
                            default='/var/log/ansibullbot.log',
                            help="Send logging to this file")
        parser.add_argument("--daemonize", action="store_true",
                            help="run in a continuos loop")
        parser.add_argument("--daemonize_interval", type=int, default=(30 * 60),
                            help="seconds to sleep between loop iterations")
        parser.add_argument("--debug", "-d", action="store_true",
                            help="Debug output")
        parser.add_argument("--verbose", "-v", action="store_true",
                            help="Verbose output")
        parser.add_argument("--dry-run", "-n", action="store_true",
                            help="Don't make any changes")
        parser.add_argument("--force", "-f", action="store_true",
                            help="Do not ask questions")
        parser.add_argument("--pause", "-p", action="store_true", dest="always_pause",
                            help="Always pause between prs|issues")
        parser.add_argument("--force_rate_limit", action="store_true",
                            help="debug: force the rate limit")
        parser.add_argument("--force_description_fixer", action="store_true",
                            help="Always invoke the description fixer")
        # useful for debugging
        parser.add_argument("--dump_actions", action="store_true",
                            help="serialize the actions to disk [/tmp/actions]")
        parser.add_argument("--botmetafile", type=str,
                            default=None,
                            help="Use this filepath for botmeta instead of from the repo")
        return parser

    def set_logger(self):
        if self.debug:
            logging.level = logging.DEBUG
        else:
            logging.level = logging.INFO
        logFormatter = \
            logging.Formatter("%(asctime)s %(levelname)s %(message)s")
        rootLogger = logging.getLogger()
        if self.debug:
            rootLogger.setLevel(logging.DEBUG)
        else:
            rootLogger.setLevel(logging.INFO)

        logdir = os.path.dirname(self.logfile)
        if logdir and not os.path.isdir(logdir):
            os.makedirs(logdir)

        fileHandler = WatchedFileHandler(self.logfile)
        fileHandler.setFormatter(logFormatter)
        rootLogger.addHandler(fileHandler)
        consoleHandler = logging.StreamHandler()
        consoleHandler.setFormatter(logFormatter)
        rootLogger.addHandler(consoleHandler)

    def start(self):

        if self.force_rate_limit:
            logging.warning('attempting to trigger rate limit')
            self.trigger_rate_limit()
            return

        if self.daemonize:
            logging.info('starting daemonize loop')
            self.loop()
        else:
            logging.info('starting single run')
            self.run()
        logging.info('stopping bot')

    @RateLimited
    def _connect(self):
        """Connects to GitHub's API"""
        if self.github_token:
            return Github(base_url=self.github_url, login_or_token=self.github_token)
        else:
            return Github(
                base_url=self.github_url,
                login_or_token=self.github_user,
                password=self.github_pass
            )

    def is_pr(self, issue):
        if '/pull/' in issue.html_url:
            return True
        else:
            return False

    def is_issue(self, issue):
        return not self.is_pr(issue)

    @RateLimited
    def get_members(self, organization):
        """Get members of an organization

        Args:
            organization: name of the organization

        Returns:
            A list of GitHub login belonging to the organization
        """
        members = []
        update = False
        write_cache = False
        now = self.get_current_time()
        gh_org = self._connect().get_organization(organization)

        cachedir = os.path.join(self.cachedir_base, organization)
        if not os.path.isdir(cachedir):
            os.makedirs(cachedir)

        cachefile = os.path.join(cachedir, 'members.pickle')

        if os.path.isfile(cachefile):
            with open(cachefile, 'rb') as f:
                mdata = pickle_load(f)
            members = mdata[1]
            if mdata[0] < gh_org.updated_at:
                update = True
        else:
            update = True
            write_cache = True

        if update:
            members = gh_org.get_members()
            members = [x.login for x in members]

        # save the data
        if write_cache:
            mdata = [now, members]
            with open(cachefile, 'wb') as f:
                pickle_dump(mdata, f)

        return members

    @RateLimited
    def get_core_team(self, organization, teams):
        """Get members of the core team

        Args:
            organization: name of the teams' organization
            teams: list of teams that compose the project core team

        Returns:
            A list of GitHub login belonging to teams
        """
        members = set()

        conn = self._connect()
        gh_org = conn.get_organization(organization)
        for team in gh_org.get_teams():
            if team.name in teams:
                for member in team.get_members():
                    members.add(member.login)

        return sorted(members)

    #@RateLimited
    def get_valid_labels(self, repo):

        # use the repo wrapper to enable caching+updating
        if not self.ghw:
            self.gh = self._connect()
            self.ghw = GithubWrapper(self.gh)

        rw = self.ghw.get_repo(repo)
        vlabels = []
        for vl in rw.get_labels():
            vlabels.append(vl.name)

        return vlabels

    def loop(self):
        '''Call the run method in a defined interval'''
        while True:
            self.run()
            self.ITERATION += 1
            interval = self.daemonize_interval
            logging.info('sleep %ss (%sm)' % (interval, interval / 60))
            time.sleep(interval)

    @abc.abstractmethod
    def run(self):
        pass

    def get_current_time(self):
        return datetime.utcnow()

    def render_boilerplate(self, tvars, boilerplate=None):
        template = environment.get_template('%s.j2' % boilerplate)
        comment = template.render(**tvars)
        return comment

    def apply_actions(self, iw, actions):

        action_meta = {'REDO': False}

        if actions.count() > 0:

            if self.dump_actions:
                self.dump_action_dict(iw, actions)

            if self.dry_run:
                print("Dry-run specified, skipping execution of actions")
            else:
                if self.force:
                    print("Running actions non-interactive as you forced.")
                    self.execute_actions(iw, actions)
                    return action_meta
                cont = input("Take recommended actions (y/N/a/R/T/DEBUG)? ")
                if cont in ('a', 'A'):
                    sys.exit(0)
                if cont in ('Y', 'y'):
                    self.execute_actions(iw, actions)
                if cont == 'T':
                    self.template_wizard(iw)
                    action_meta['REDO'] = True
                if cont in ('r', 'R'):
                    action_meta['REDO'] = True
                if cont == 'DEBUG':
                    # put the user into a breakpoint to do live debug
                    action_meta['REDO'] = True
                    import epdb; epdb.st()
        elif self.always_pause:
            print("Skipping, but pause.")
            cont = input("Continue (Y/n/a/R/T/DEBUG)? ")
            if cont in ('a', 'A', 'n', 'N'):
                sys.exit(0)
            if cont == 'T':
                self.template_wizard(iw)
                action_meta['REDO'] = True
            elif cont in ('r', 'R'):
                action_meta['REDO'] = True
            elif cont == 'DEBUG':
                # put the user into a breakpoint to do live debug
                import epdb; epdb.st()
                action_meta['REDO'] = True
        elif self.force_description_fixer:
            # FIXME: self.FIXED_ISSUES not defined since 1cf9674cd38edbd17aff906d72296c99043e5c13
            #        either define self.FIXED_ISSUES, either remove this method
            # FIXME force_description_fixer is not known by DefaultTriager (only
            #       by AnsibleTriage): if not removed, move it to AnsibleTriage
            if iw.html_url not in self.FIXED_ISSUES:
                if self.meta['template_missing_sections']:
                    changed = self.template_wizard(iw)
                    if changed:
                        action_meta['REDO'] = True
                self.FIXED_ISSUES.append(iw.html_url)
        else:
            print("Skipping.")

        # let the upper level code redo this issue
        return action_meta

    def template_wizard(self, iw):

        DF = DescriptionFixer(iw, self.meta)

        old = iw.body
        old_lines = old.split('\n')
        new = DF.new_description
        new_lines = new.split('\n')

        total_lines = len(new_lines)
        if len(old_lines) > total_lines:
            total_lines = len(old_lines)

        if len(new_lines) < total_lines:
            delta = total_lines - len(new_lines)
            for x in xrange(0, delta):
                new_lines.append('')

        if len(old_lines) < total_lines:
            delta = total_lines - len(old_lines)
            for x in xrange(0, delta):
                old_lines.append('')

        line = '--------------------------------------------------------'
        padding = 100
        print("%s|%s" % (line.ljust(padding), line))
        for c1, c2 in zip(old_lines, new_lines):
            if len(c1) > padding:
                c1 = c1[:padding-4]
            if len(c2) > padding:
                c2 = c2[:padding-4]
            print("%s|%s" % (c1.rstrip().ljust(padding), c2.rstrip()))
        print("%s|%s" % (line.rstrip().ljust(padding), line))

        print('# ' + iw.html_url)
        cont = input("Apply this new description? (Y/N) ")
        if cont == 'Y':
            iw.set_description(DF.new_description)
            return True
        else:
            return False

    def execute_actions(self, iw, actions):
        """Turns the actions into API calls"""

        for comment in actions.comments:
            logging.info("acton: comment - " + comment)
            iw.add_comment(comment=comment)
        if actions.close:
            # https://github.com/PyGithub/PyGithub/blob/master/github/Issue.py#L263
            logging.info('action: close')
            iw.instance.edit(state='closed')
            return

        for unlabel in actions.unlabel:
            logging.info('action: unlabel - ' + unlabel)
            iw.remove_label(label=unlabel)
        for newlabel in actions.newlabel:
            logging.info('action: label - ' + newlabel)
            iw.add_label(label=newlabel)

        for user in actions.assign:
            logging.info('action: assign - ' + user)
            iw.assign_user(user)

        for user in actions.unassign:
            logging.info('action: unassign - ' + user)
            iw.unassign_user(user)

        if actions.merge:
            iw.merge()

    #@RateLimited
    def is_pr_merged(self, number, repo):
        '''Check if a PR# has been merged or not'''

        if number is None:
            if C.DEFAULT_BREAKPOINTS:
                logging.error('breakpoint!')
                import epdb; epdb.st()
            raise Exception('Can not check merge state on the number: None')

        merged = False
        pr = None
        try:
            pr = repo.get_pullrequest(number)
        except Exception as e:
            print(e)
        if pr:
            try:
                merged = pr.merged
            except Exception as e:
                logging.debug(e)
                if C.DEFAULT_BREAKPOINTS:
                    logging.error('breakpoint!')
                    import epdb; epdb.st()
        return merged

    def trigger_rate_limit(self):
        '''Repeatedly make calls to exhaust rate limit'''

        self.gh = self._connect()
        self.ghw = GithubWrapper(self.gh)

        while True:
            cachedir = os.path.join(self.cachedir_base, self.repo)
            thisrepo = self.ghw.get_repo(self.repo, verbose=False)
            issues = thisrepo.repo.get_issues()
            rl = thisrepo.get_rate_limit()
            pprint(rl)

            for issue in issues:
                iw = IssueWrapper(
                        github=self.ghw,
                        repo=thisrepo,
                        issue=issue,
                        cachedir=cachedir
                )
                iw.history
                rl = thisrepo.get_rate_limit()
                pprint(rl)

    def dump_action_dict(self, issue, actions):
        '''Serialize the action dict to disk for quick(er) debugging'''
        fn = os.path.join(u'/tmp', u'actions', issue.repo_full_name, to_text(issue.number) + u'.json')
        dn = os.path.dirname(fn)
        if not os.path.isdir(dn):
            os.makedirs(dn)

        logging.info('dumping {}'.format(fn))
        with open(fn, 'wb') as f:
            f.write(json.dumps(actions, indent=2, sort_keys=True))
예제 #7
0
class DefaultTriager:
    """
    How to use:
    1. Create a new class which inherits from DefaultTriager
    2. Implement 'Triager.run(self)' method:
        - iterate over issues/pull requests
        - for each issue
        1. create 'actions = DefaultActions()'
        2. define which action(s) should be done updating 'actions' instance
        3. call parent 'apply_actions' methods: 'DefaultTriager.apply_actions(iw, actions)'
    3. Run:
    def main():
        Triager().start()
    """
    CLOSING_LABELS = []

    def __init__(self, args=None):
        parser = self.create_parser()
        self.args = parser.parse_args(args)

        logging.info('starting bot')
        self.set_logger()

        self.cachedir_base = os.path.expanduser(self.args.cachedir_base)
        self.issue_summaries = {}
        self.repos = {}

        # resume is just an overload for the start-at argument
        resume = self.get_resume()
        if resume:
            if self.args.sort == 'desc':
                self.args.start_at = resume['number'] - 1
            else:
                self.args.start_at = resume['number'] + 1

        logging.info('creating api wrapper')
        self.ghw = GithubWrapper(url=C.DEFAULT_GITHUB_URL,
                                 user=C.DEFAULT_GITHUB_USERNAME,
                                 passw=C.DEFAULT_GITHUB_PASSWORD,
                                 token=C.DEFAULT_GITHUB_TOKEN,
                                 cachedir=self.cachedir_base)

        logging.info('creating graphql client')
        self.gqlc = GithubGraphQLClient(C.DEFAULT_GITHUB_TOKEN,
                                        server=C.DEFAULT_GITHUB_URL)

        self._maintainer_team = None

    @property
    def maintainer_team(self):
        # Note: this assumes that the token used by the bot has access to check
        # team privileges across potentially more than one organization
        if self._maintainer_team is None:
            self._maintainer_team = []
            teams = C.DEFAULT_GITHUB_MAINTAINERS
            for team in teams:
                _org, _team = team.split('/')
                self._maintainer_team.extend(self.ghw.get_members(_org, _team))
        return sorted(
            set(self._maintainer_team).difference(C.DEFAULT_BOT_NAMES))

    @classmethod
    def create_parser(cls):
        parser = argparse.ArgumentParser()
        parser.add_argument(
            "--botmetafile",
            type=str,
            default=None,
            help="Use this filepath for botmeta instead of from the repo")
        parser.add_argument("--cachedir",
                            type=str,
                            dest='cachedir_base',
                            default='~/.ansibullbot/cache')
        parser.add_argument("--daemonize",
                            action="store_true",
                            help="run in a continuos loop")
        parser.add_argument("--daemonize_interval",
                            type=int,
                            default=(30 * 60),
                            help="seconds to sleep between loop iterations")
        parser.add_argument("--debug",
                            "-d",
                            action="store_true",
                            help="Debug output")
        parser.add_argument("--dry-run",
                            "-n",
                            action="store_true",
                            help="Don't make any changes")
        parser.add_argument(
            "--dump_actions",
            action="store_true",
            help="serialize the actions to disk [/tmp/actions]")
        parser.add_argument("--force",
                            "-f",
                            action="store_true",
                            help="Do not ask questions")
        parser.add_argument("--logfile",
                            type=str,
                            default='/var/log/ansibullbot.log',
                            help="Send logging to this file")
        parser.add_argument("--ignore_state",
                            action="store_true",
                            help="Do not skip processing closed issues")
        parser.add_argument("--last",
                            type=int,
                            help="triage the last N issues or PRs")
        parser.add_argument("--only_closed",
                            action="store_true",
                            help="Triage closed issues|prs only")
        parser.add_argument("--only_issues",
                            action="store_true",
                            help="Triage issues only")
        parser.add_argument("--only_prs",
                            action="store_true",
                            help="Triage pullrequests only")
        parser.add_argument("--pause",
                            "-p",
                            action="store_true",
                            dest="always_pause",
                            help="Always pause between prs|issues")
        parser.add_argument(
            "--pr",
            "--id",
            type=str,
            help="Triage only the specified pr|issue (separated by commas)")
        parser.add_argument(
            "--resume",
            action="store_true",
            dest="resume_enabled",
            help="pickup right after where the bot last stopped")
        parser.add_argument("--repo",
                            "-r",
                            type=str,
                            help="Github repo to triage (defaults to all)")
        parser.add_argument("--skiprepo",
                            action='append',
                            help="Github repo to skip triaging")
        parser.add_argument("--start-at",
                            type=int,
                            help="Start triage at the specified pr|issue")
        parser.add_argument("--sort",
                            default='desc',
                            choices=['asc', 'desc'],
                            help="Direction to sort issues [desc=9-0 asc=0-9]")
        return parser

    def set_logger(self):
        set_logger(debug=self.args.debug, logfile=self.args.logfile)

    def start(self):
        if self.args.daemonize:
            logging.info('starting daemonize loop')
            self.loop()
        else:
            logging.info('starting single run')
            self.run()
        logging.info('stopping bot')

    def loop(self):
        """Call the run method in a defined interval"""
        while True:
            self.run()
            interval = self.args.daemonize_interval
            logging.info('sleep %ss (%sm)' % (interval, interval / 60))
            time.sleep(interval)

    @abc.abstractmethod
    def run(self):
        pass

    def render_boilerplate(self, tvars, boilerplate=None):
        template = environment.get_template('%s.j2' % boilerplate)
        comment = template.render(**tvars)
        return comment

    def apply_actions(self, iw, actions):
        action_meta = {'REDO': False}

        if actions.count() > 0:
            if self.args.dump_actions:
                self.dump_action_dict(iw, actions.__dict__)

            if self.args.dry_run:
                print("Dry-run specified, skipping execution of actions")
            else:
                if self.args.force:
                    print("Running actions non-interactive as you forced.")
                    self.execute_actions(iw, actions)
                    return action_meta
                cont = input("Take recommended actions (y/N/a/R/DEBUG)? ")
                if cont in ('a', 'A'):
                    sys.exit(0)
                if cont in ('Y', 'y'):
                    self.execute_actions(iw, actions)
                if cont in ('r', 'R'):
                    action_meta['REDO'] = True
                if cont == 'DEBUG':
                    # put the user into a breakpoint to do live debug
                    action_meta['REDO'] = True
                    import epdb
                    epdb.st()
        elif self.args.always_pause:
            print("Skipping, but pause.")
            cont = input("Continue (Y/n/a/R/DEBUG)? ")
            if cont in ('a', 'A', 'n', 'N'):
                sys.exit(0)
            elif cont in ('r', 'R'):
                action_meta['REDO'] = True
            elif cont == 'DEBUG':
                # put the user into a breakpoint to do live debug
                import epdb
                epdb.st()
                action_meta['REDO'] = True
        else:
            print("Skipping.")

        # let the upper level code redo this issue
        return action_meta

    def execute_actions(self, iw, actions):
        """Turns the actions into API calls"""
        for commentid in actions.uncomment:
            iw.remove_comment_by_id(commentid)

        for comment in actions.comments:
            logging.info("acton: comment - " + comment)
            iw.add_comment(comment=comment)

        if actions.close:
            for newlabel in actions.newlabel:
                if newlabel in self.CLOSING_LABELS:
                    logging.info('action: label - ' + newlabel)
                    iw.add_label(label=newlabel)

            logging.info('action: close')
            iw.instance.edit(state='closed')

        else:
            for unlabel in actions.unlabel:
                logging.info('action: unlabel - ' + unlabel)
                iw.remove_label(label=unlabel)
            for newlabel in actions.newlabel:
                logging.info('action: label - ' + newlabel)
                iw.add_label(label=newlabel)

            if actions.merge:
                iw.merge()

    def dump_action_dict(self, issue, actions):
        """Serialize the action dict to disk for quick(er) debugging"""
        fn = os.path.join('/tmp', 'actions', issue.repo_full_name,
                          str(issue.number) + '.json')
        dn = os.path.dirname(fn)
        if not os.path.isdir(dn):
            os.makedirs(dn)

        logging.info(f'dumping {fn}')
        with open(fn, 'w') as f:
            f.write(json.dumps(actions, indent=2, sort_keys=True))

    def get_resume(self):
        '''Returns a dict with the last issue repo+number processed'''
        if self.args.pr or not self.args.resume_enabled:
            return

        resume_file = os.path.join(self.cachedir_base, 'resume.json')
        if not os.path.isfile(resume_file):
            logging.error('Resume: %r not found', resume_file)
            return None

        logging.debug('Resume: read %r', resume_file)
        with open(resume_file, 'r', encoding='utf-8') as f:
            data = json.loads(f.read())
        return data

    def set_resume(self, repo, number):
        if self.args.pr or not self.args.resume_enabled:
            return

        data = {'repo': repo, 'number': number}
        resume_file = os.path.join(self.cachedir_base, 'resume.json')
        with open(resume_file, 'w', encoding='utf-8') as f:
            json.dump(data, f)

    def eval_pr_param(self, pr):
        '''PR/ID can be a number, numberlist, script, jsonfile, or url'''

        if isinstance(pr, list):
            pass

        elif pr.isdigit():
            pr = int(pr)

        elif pr.startswith('http'):
            rr = requests.get(pr)
            numbers = rr.json()
            pr = numbers[:]

        elif os.path.isfile(pr) and not os.access(pr, os.X_OK):
            with open(pr) as f:
                numbers = json.loads(f.read())
            pr = numbers[:]

        elif os.path.isfile(pr) and os.access(pr, os.X_OK):
            # allow for scripts when trying to target spec issues
            logging.info('executing %s' % pr)
            (rc, so, se) = run_command(pr)
            numbers = json.loads(to_text(so))
            if numbers:
                if isinstance(numbers[0], dict) and 'number' in numbers[0]:
                    numbers = [x['number'] for x in numbers]
                else:
                    numbers = [int(x) for x in numbers]
            logging.info('%s numbers after running script' % len(numbers))
            pr = numbers[:]

        elif ',' in pr:
            numbers = [int(x) for x in pr.split(',')]
            pr = numbers[:]

        if not isinstance(pr, list):
            pr = [pr]

        return pr

    def update_issue_summaries(self, repopath=None, issuenums=None):
        if issuenums and len(issuenums) <= 10:
            self.issue_summaries[repopath] = {}

            for num in issuenums:
                # --pr is an alias to --id and can also be for issues
                node = self.gqlc.get_summary(repopath, 'pullRequest', num)
                if node is None:
                    node = self.gqlc.get_summary(repopath, 'issue', num)
                if node is not None:
                    self.issue_summaries[repopath][to_text(num)] = node
        else:
            self.issue_summaries[repopath] = self.gqlc.get_issue_summaries(
                repopath)

    def get_stale_numbers(self, reponame):
        stale = []
        for number, summary in self.issue_summaries[reponame].items():
            if number in stale:
                continue

            if summary['state'] == 'closed':
                continue

            number = int(number)
            mfile = os.path.join(self.cachedir_base, reponame, 'issues',
                                 to_text(number), 'meta.json')

            if not os.path.isfile(mfile):
                stale.append(number)
                continue

            try:
                with open(mfile, 'rb') as f:
                    meta = json.load(f)
            except ValueError as e:
                logging.error('failed to parse %s: %s' %
                              (to_text(mfile), to_text(e)))
                os.remove(mfile)
                stale.append(number)
                continue

            delta = (datetime.datetime.now() -
                     strip_time_safely(meta['time'])).days
            if delta > C.DEFAULT_STALE_WINDOW:
                stale.append(number)

        stale = sorted({int(x) for x in stale})
        if 10 >= len(stale) > 0:
            logging.info('stale: %s' % ','.join([to_text(x) for x in stale]))

        return stale

    @RateLimited
    def _collect_repo(self, repo, issuenums=None):
        '''Collect issues for an individual repo'''
        logging.info('getting repo obj for %s' % repo)
        if repo not in self.repos:
            gitrepo = GitRepoWrapper(
                cachedir=self.cachedir_base,
                repo=f'https://github.com/{repo}',
                commit=self.args.ansible_commit,
            )
            self.repos[repo] = {
                'repo': self.ghw.get_repo(repo),
                'issues': [],
                'processed': [],
                'since': None,
                'stale': [],
                'loopcount': 0,
                'labels': self.ghw.get_valid_labels(repo),
                'gitrepo': gitrepo,
            }
        else:
            # force a clean repo object to limit caching problems
            logging.info('updating repo')
            self.repos[repo]['repo'] = self.ghw.get_repo(repo)
            logging.info('updating checkout')
            self.repos[repo]['gitrepo'].update()

            # clear the issues
            self.repos[repo]['issues'] = {}
            # increment the loopcount
            self.repos[repo]['loopcount'] += 1

        logging.info('getting issue objs for %s' % repo)
        self.update_issue_summaries(repopath=repo, issuenums=issuenums)

        issuecache = {}
        numbers = self.issue_summaries[repo].keys()
        numbers = {int(x) for x in numbers}
        if issuenums:
            numbers.intersection_update(issuenums)
            numbers = list(numbers)
        logging.info('%s known numbers' % len(numbers))

        if self.args.daemonize:

            if not self.repos[repo]['since']:
                ts = [
                    x[1]['updated_at']
                    for x in self.issue_summaries[repo].items()
                    if x[1]['updated_at']
                ]
                ts += [
                    x[1]['created_at']
                    for x in self.issue_summaries[repo].items()
                    if x[1]['created_at']
                ]
                ts = sorted(set(ts))
                if ts:
                    self.repos[repo]['since'] = ts[-1]
            else:
                since = strip_time_safely(self.repos[repo]['since'])
                api_since = self.repos[repo]['repo'].get_issues(since=since)

                numbers = []
                for x in api_since:
                    numbers.append(x.number)
                    issuecache[x.number] = x

                numbers = sorted({int(n) for n in numbers})
                logging.info('%s numbers after [api] since == %s' %
                             (len(numbers), since))

                for k, v in self.issue_summaries[repo].items():
                    if v['created_at'] is None:
                        # issue is closed and was never processed
                        continue

                    if v['created_at'] > self.repos[repo]['since']:
                        numbers.append(k)

                numbers = sorted({int(n) for n in numbers})
                logging.info('%s numbers after [www] since == %s' %
                             (len(numbers), since))

        if self.args.start_at and self.repos[repo]['loopcount'] == 0:
            numbers = [x for x in numbers if x <= self.args.start_at]
            logging.info('%s numbers after start-at' % len(numbers))

        # Get stale numbers if not targeting
        if self.args.daemonize and self.repos[repo]['loopcount'] > 0:
            logging.info('checking for stale numbers')
            stale = self.get_stale_numbers(repo)
            self.repos[repo]['stale'] = [int(x) for x in stale]
            numbers += [int(x) for x in stale]
            numbers = sorted(set(numbers))
            logging.info('%s numbers after stale check' % len(numbers))

        ################################################################
        # PRE-FILTERING TO PREVENT EXCESSIVE API CALLS
        ################################################################

        # filter just the open numbers
        if not self.args.only_closed and not self.args.ignore_state:
            numbers = [
                x for x in numbers
                if (to_text(x) in self.issue_summaries[repo] and
                    self.issue_summaries[repo][to_text(x)]['state'] == 'open')
            ]
            logging.info('%s numbers after checking state' % len(numbers))

        # filter by type
        if self.args.only_issues:
            numbers = [
                x for x in numbers
                if self.issue_summaries[repo][to_text(x)]['type'] == 'issue'
            ]
            logging.info('%s numbers after checking type' % len(numbers))
        elif self.args.only_prs:
            numbers = [
                x for x in numbers if self.issue_summaries[repo][to_text(x)]
                ['type'] == 'pullrequest'
            ]
            logging.info('%s numbers after checking type' % len(numbers))

        numbers = sorted({int(x) for x in numbers})
        if self.args.sort == 'desc':
            numbers = [x for x in reversed(numbers)]

        if self.args.last and len(numbers) > self.args.last:
            numbers = numbers[0 - self.args.last:]

        # Use iterator to avoid requesting all issues upfront
        self.repos[repo]['issues'] = RepoIssuesIterator(
            self.repos[repo]['repo'], numbers, issuecache=issuecache)

        logging.info('getting repo objs for %s complete' % repo)

    def collect_repos(self):
        '''Populate the local cache of repos'''
        logging.info('start collecting repos')
        for repo in C.DEFAULT_GITHUB_REPOS:
            # skip repos based on args
            if self.args.repo and self.args.repo != repo:
                continue
            if self.args.skiprepo:
                if repo in self.args.skiprepo:
                    continue

            if self.args.pr:
                numbers = self.eval_pr_param(self.args.pr)
                self._collect_repo(repo, issuenums=numbers)
            else:
                self._collect_repo(repo)
        logging.info('finished collecting issues')