def __init__(self):

        parser = self.create_parser()
        args = parser.parse_args()

        for x in vars(args):
            val = getattr(args, x)
            setattr(self, x, val)

        self.last_run = None

        self.github_user = C.DEFAULT_GITHUB_USERNAME
        self.github_pass = C.DEFAULT_GITHUB_PASSWORD
        self.github_token = C.DEFAULT_GITHUB_TOKEN

        # where to store junk
        self.cachedir_base = os.path.expanduser(self.cachedir_base)

        self.set_logger()
        logging.info('starting bot')

        # connect to github
        logging.info('creating api connection')
        self.gh = self._connect()

        # wrap the connection
        logging.info('creating api wrapper')
        self.ghw = GithubWrapper(self.gh, cachedir=self.cachedir_base)
    def __init__(self, args=None):
        parser = self.create_parser()
        self.args = parser.parse_args(args)

        logging.info('starting bot')
        self.set_logger()

        self.cachedir_base = os.path.expanduser(self.args.cachedir_base)
        self.issue_summaries = {}
        self.repos = {}

        # resume is just an overload for the start-at argument
        resume = self.get_resume()
        if resume:
            if self.args.sort == 'desc':
                self.args.start_at = resume['number'] - 1
            else:
                self.args.start_at = resume['number'] + 1

        logging.info('creating api wrapper')
        self.ghw = GithubWrapper(url=C.DEFAULT_GITHUB_URL,
                                 user=C.DEFAULT_GITHUB_USERNAME,
                                 passw=C.DEFAULT_GITHUB_PASSWORD,
                                 token=C.DEFAULT_GITHUB_TOKEN,
                                 cachedir=self.cachedir_base)

        logging.info('creating graphql client')
        self.gqlc = GithubGraphQLClient(C.DEFAULT_GITHUB_TOKEN,
                                        server=C.DEFAULT_GITHUB_URL)

        self._maintainer_team = None
Exemple #3
0
    def __init__(self, args):

        self.args = args
        self.last_run = None
        self.daemonize = None
        self.daemonize_interval = None
        self.dry_run = False
        self.force = False

        self.configfile = self.args.configfile
        self.config = ConfigParser.ConfigParser()
        self.config.read([self.configfile])

        try:
            self.github_user = self.config.get('defaults', 'github_username')
        except:
            self.github_user = None

        try:
            self.github_pass = self.config.get('defaults', 'github_password')
        except:
            self.github_pass = None

        try:
            self.github_token = self.config.get('defaults', 'github_token')
        except:
            self.github_token = None

        self.repopath = self.args.repo
        self.logfile = self.args.logfile

        # where to store junk
        self.cachedir = self.args.cachedir
        self.cachedir = os.path.expanduser(self.cachedir)
        self.cachedir_base = self.cachedir

        self.set_logger()
        logging.info('starting bot')

        logging.debug('setting bot attributes')
        for x in vars(self.args):
            val = getattr(self.args, x)
            setattr(self, x, val)

        if hasattr(self.args, 'pause') and self.args.pause:
            self.always_pause = True

        # connect to github
        logging.info('creating api connection')
        self.gh = self._connect()

        # wrap the connection
        logging.info('creating api wrapper')
        self.ghw = GithubWrapper(self.gh, cachedir=self.cachedir)

        # get valid labels
        logging.info('getting labels')
        self.valid_labels = self.get_valid_labels(self.repopath)
def test_get_request_rate_limited():

    cachedir = tempfile.mkdtemp()

    gh = GithubMock()
    gw = GithubWrapper(gh, token=12345, cachedir=cachedir)

    with pytest.raises(RateLimitError):
        rdata = gw.get_request('https://foo.bar.com/test')
Exemple #5
0
    def get_valid_labels(self, repo):

        # use the repo wrapper to enable caching+updating
        if not self.ghw:
            self.gh = self._connect()
            self.ghw = GithubWrapper(self.gh)

        rw = self.ghw.get_repo(repo)
        vlabels = []
        for vl in rw.labels:
            vlabels.append(vl.name)

        return vlabels
    def setUp(self):
        cache = '/tmp/testcache'
        if os.path.isdir(cache):
            shutil.rmtree(cache)
        os.makedirs(cache)

        gh = GithubMock()
        ghw = GithubWrapper(gh, cachedir=cache)

        gr = ghw.get_repo('test/test')
        # FIXME - this should return a wrapped issue
        gi = gr.get_issue(1)
        self.iw = IssueWrapper(github=gh, repo=gr, issue=gi, cachedir=cache)
        self.iw.gitrepo = GitRepoWrapperMock()
    def setUp(self):
        cache = '/tmp/testcache'
        if os.path.isdir(cache):
            shutil.rmtree(cache)
        os.makedirs(cache)

        gh = GithubMock()
        ghw = GithubWrapper(gh, cachedir=cache)

        gr = ghw.get_repo('test/test', verbose=False)
        # FIXME - this should return a wrapped issue
        gi = gr.get_issue(1)
        self.iw = IssueWrapper(github=gh, repo=gr, issue=gi, cachedir=cache)
        self.iw.file_indexer = FileIndexerMock()
    def __init__(self):

        parser = self.create_parser()
        args = parser.parse_args()

        for x in vars(args):
            val = getattr(args, x)
            setattr(self, x, val)

        self.last_run = None

        self.github_url = C.DEFAULT_GITHUB_URL
        self.github_user = C.DEFAULT_GITHUB_USERNAME
        self.github_pass = C.DEFAULT_GITHUB_PASSWORD
        self.github_token = C.DEFAULT_GITHUB_TOKEN

        # where to store junk
        self.cachedir_base = os.path.expanduser(self.cachedir_base)

        self.set_logger()
        logging.info('starting bot')

        # connect to github
        logging.info('creating api connection')
        self.gh = self._connect()

        # wrap the connection
        logging.info('creating api wrapper')
        self.ghw = GithubWrapper(self.gh, cachedir=self.cachedir_base)
Exemple #9
0
    def get_valid_labels(self, repo=None):

        # use the repo wrapper to enable caching+updating
        if not self.ghw:
            self.gh = self._connect()
            self.ghw = GithubWrapper(self.gh)

        if not repo:
            # OLD workflow
            self.repo = self.ghw.get_repo(self._get_repo_path())
            vlabels = []
            for vl in self.repo.get_labels():
                vlabels.append(vl.name)
        else:
            # v3 workflow
            rw = self.ghw.get_repo(repo)
            vlabels = []
            for vl in rw.get_labels():
                vlabels.append(vl.name)

        return vlabels
Exemple #10
0
    def trigger_rate_limit(self):
        '''Repeatedly make calls to exhaust rate limit'''

        self.gh = self._connect()
        self.ghw = GithubWrapper(self.gh)

        while True:
            cachedir = os.path.join(self.cachedir_base, self.repo)
            thisrepo = self.ghw.get_repo(self.repo, verbose=False)
            issues = thisrepo.repo.get_issues()
            rl = thisrepo.get_rate_limit()
            pprint(rl)

            for issue in issues:
                iw = IssueWrapper(github=self.ghw,
                                  repo=thisrepo,
                                  issue=issue,
                                  cachedir=cachedir)
                iw.history
                rl = thisrepo.get_rate_limit()
                pprint(rl)
Exemple #11
0
    def get_valid_labels(self, repo):

        # use the repo wrapper to enable caching+updating
        if not self.ghw:
            self.gh = self._connect()
            self.ghw = GithubWrapper(self.gh)

        rw = self.ghw.get_repo(repo)
        vlabels = []
        for vl in rw.get_labels():
            vlabels.append(vl.name)

        return vlabels
Exemple #12
0
    def trigger_rate_limit(self):
        '''Repeatedly make calls to exhaust rate limit'''

        self.gh = self._connect()
        self.ghw = GithubWrapper(self.gh)

        while True:
            cachedir = os.path.join(self.cachedir_base, self.repo)
            thisrepo = self.ghw.get_repo(self.repo, verbose=False)
            issues = thisrepo.repo.get_issues()
            rl = thisrepo.get_rate_limit()
            pprint(rl)

            for issue in issues:
                iw = IssueWrapper(
                        github=self.ghw,
                        repo=thisrepo,
                        issue=issue,
                        cachedir=cachedir
                )
                iw.history
                rl = thisrepo.get_rate_limit()
                pprint(rl)
Exemple #13
0
def main():

    logging.level = logging.DEBUG
    logFormatter = logging.Formatter("%(asctime)s %(levelname)s %(message)s")
    rootLogger = logging.getLogger()
    rootLogger.setLevel(logging.DEBUG)
    consoleHandler = logging.StreamHandler()
    consoleHandler.setFormatter(logFormatter)
    rootLogger.addHandler(consoleHandler)

    summaries = None
    gq_cache_file = '/tmp/gql_cache.json'

    if not os.path.exists(gq_cache_file):
        gqlc = GithubGraphQLClient(C.DEFAULT_GITHUB_TOKEN)
        summaries = gqlc.get_issue_summaries('ansible/ansible')
        with open(gq_cache_file, 'w') as f:
            f.write(json.dumps(summaries))
    else:
        with open(gq_cache_file, 'r') as f:
            summaries = json.loads(f.read())

    numbers = set()
    for k, v in summaries.items():
        if v['state'] != 'open':
            continue
        numbers.add(v['number'])
    numbers = sorted(numbers, reverse=True)

    gh = GithubWrapper(None, token=C.DEFAULT_GITHUB_TOKEN)

    for idn, number in enumerate(numbers):
        logging.info('%s|%s issue %s' % (len(numbers), idn + 1, number))

        if number > 52979:
            continue

        comments_url = 'https://api.github.com/repos/ansible/ansible/issues/%s/comments' % number
        comments = gh.get_request(comments_url)

        duplicates = {}
        for comment in comments:
            if comment['user']['login'] != 'ansibot':
                continue
            if comment['body'] not in duplicates:
                duplicates[comment['body']] = []
            duplicates[comment['body']].append(comment['id'])

        if duplicates:
            topop = []
            for k, v in duplicates.items():
                if len(v) <= 1:
                    topop.append(k)
            for tp in topop:
                duplicates.pop(tp, None)

            if duplicates:
                for k, v in duplicates.items():
                    dupes = [x for x in comments if x['id'] in v]
                    dupes = sorted(dupes, key=lambda x: x['created_at'])

                    pprint([[x['id'], x['body']] for x in dupes])

                    #if '<!--- boilerplate: notify --->' not in dupes[0]['body']:
                    #    continue

                    #import epdb; epdb.st()

                    for dupe in dupes[1:]:
                        gh.delete_request(dupe['url'])
                    time.sleep(1)
class DefaultTriager(object):

    ITERATION = 0

    def __init__(self):

        parser = self.create_parser()
        args = parser.parse_args()

        for x in vars(args):
            val = getattr(args, x)
            setattr(self, x, val)

        self.last_run = None

        self.github_user = C.DEFAULT_GITHUB_USERNAME
        self.github_pass = C.DEFAULT_GITHUB_PASSWORD
        self.github_token = C.DEFAULT_GITHUB_TOKEN

        # where to store junk
        self.cachedir_base = os.path.expanduser(self.cachedir_base)

        self.set_logger()
        logging.info('starting bot')

        # connect to github
        logging.info('creating api connection')
        self.gh = self._connect()

        # wrap the connection
        logging.info('creating api wrapper')
        self.ghw = GithubWrapper(self.gh, cachedir=self.cachedir_base)

    @classmethod
    def create_parser(cls):
        """Creates an argument parser

        Returns:
            A argparse.ArgumentParser object
        """
        parser = argparse.ArgumentParser()
        parser.add_argument("--cachedir",
                            type=str,
                            dest='cachedir_base',
                            default='~/.ansibullbot/cache')
        parser.add_argument("--logfile",
                            type=str,
                            default='/var/log/ansibullbot.log',
                            help="Send logging to this file")
        parser.add_argument("--daemonize",
                            action="store_true",
                            help="run in a continuos loop")
        parser.add_argument("--daemonize_interval",
                            type=int,
                            default=(30 * 60),
                            help="seconds to sleep between loop iterations")
        parser.add_argument("--debug",
                            "-d",
                            action="store_true",
                            help="Debug output")
        parser.add_argument("--verbose",
                            "-v",
                            action="store_true",
                            help="Verbose output")
        parser.add_argument("--dry-run",
                            "-n",
                            action="store_true",
                            help="Don't make any changes")
        parser.add_argument("--force",
                            "-f",
                            action="store_true",
                            help="Do not ask questions")
        parser.add_argument("--pause",
                            "-p",
                            action="store_true",
                            dest="always_pause",
                            help="Always pause between prs|issues")
        parser.add_argument("--force_rate_limit",
                            action="store_true",
                            help="debug: force the rate limit")
        parser.add_argument("--force_description_fixer",
                            action="store_true",
                            help="Always invoke the description fixer")
        # useful for debugging
        parser.add_argument(
            "--dump_actions",
            action="store_true",
            help="serialize the actions to disk [/tmp/actions]")
        parser.add_argument(
            "--botmetafile",
            type=str,
            default=None,
            help="Use this filepath for botmeta instead of from the repo")
        return parser

    def set_logger(self):
        if self.debug:
            logging.level = logging.DEBUG
        else:
            logging.level = logging.INFO
        logFormatter = \
            logging.Formatter("%(asctime)s %(levelname)s %(message)s")
        rootLogger = logging.getLogger()
        if self.debug:
            rootLogger.setLevel(logging.DEBUG)
        else:
            rootLogger.setLevel(logging.INFO)

        logdir = os.path.dirname(self.logfile)
        if logdir and not os.path.isdir(logdir):
            os.makedirs(logdir)

        fileHandler = WatchedFileHandler(self.logfile)
        fileHandler.setFormatter(logFormatter)
        rootLogger.addHandler(fileHandler)
        consoleHandler = logging.StreamHandler()
        consoleHandler.setFormatter(logFormatter)
        rootLogger.addHandler(consoleHandler)

    def start(self):

        if self.force_rate_limit:
            logging.warning('attempting to trigger rate limit')
            self.trigger_rate_limit()
            return

        if self.daemonize:
            logging.info('starting daemonize loop')
            self.loop()
        else:
            logging.info('starting single run')
            self.run()
        logging.info('stopping bot')

    @RateLimited
    def _connect(self):
        """Connects to GitHub's API"""
        if self.github_token:
            return Github(login_or_token=self.github_token)
        else:
            return Github(login_or_token=self.github_user,
                          password=self.github_pass)

    def is_pr(self, issue):
        if '/pull/' in issue.html_url:
            return True
        else:
            return False

    def is_issue(self, issue):
        return not self.is_pr(issue)

    @RateLimited
    def get_members(self, organization):
        """Get members of an organization

        Args:
            organization: name of the organization

        Returns:
            A list of GitHub login belonging to the organization
        """
        members = []
        update = False
        write_cache = False
        now = self.get_current_time()
        gh_org = self._connect().get_organization(organization)

        cachedir = os.path.join(self.cachedir_base, organization)
        if not os.path.isdir(cachedir):
            os.makedirs(cachedir)

        cachefile = os.path.join(cachedir, 'members.pickle')

        if os.path.isfile(cachefile):
            with open(cachefile, 'rb') as f:
                mdata = pickle.load(f)
            members = mdata[1]
            if mdata[0] < gh_org.updated_at:
                update = True
        else:
            update = True
            write_cache = True

        if update:
            members = gh_org.get_members()
            members = [x.login for x in members]

        # save the data
        if write_cache:
            mdata = [now, members]
            with open(cachefile, 'wb') as f:
                pickle.dump(mdata, f)

        return members

    @RateLimited
    def get_core_team(self, organization, teams):
        """Get members of the core team

        Args:
            organization: name of the teams' organization
            teams: list of teams that compose the project core team

        Returns:
            A list of GitHub login belonging to teams
        """
        members = set()

        conn = self._connect()
        gh_org = conn.get_organization(organization)
        for team in gh_org.get_teams():
            if team.name in teams:
                for member in team.get_members():
                    members.add(member.login)

        return sorted(members)

    #@RateLimited
    def get_valid_labels(self, repo):

        # use the repo wrapper to enable caching+updating
        if not self.ghw:
            self.gh = self._connect()
            self.ghw = GithubWrapper(self.gh)

        rw = self.ghw.get_repo(repo)
        vlabels = []
        for vl in rw.get_labels():
            vlabels.append(vl.name)

        return vlabels

    def loop(self):
        '''Call the run method in a defined interval'''
        while True:
            self.run()
            self.ITERATION += 1
            interval = self.daemonize_interval
            logging.info('sleep %ss (%sm)' % (interval, interval / 60))
            time.sleep(interval)

    @abc.abstractmethod
    def run(self):
        pass

    def get_current_time(self):
        return datetime.utcnow()

    def render_boilerplate(self, tvars, boilerplate=None):
        template = environment.get_template('%s.j2' % boilerplate)
        comment = template.render(**tvars)
        return comment

    def apply_actions(self, iw, actions):

        action_meta = {'REDO': False}

        if actions.count() > 0:

            if self.dump_actions:
                self.dump_action_dict(iw, actions)

            if self.dry_run:
                print("Dry-run specified, skipping execution of actions")
            else:
                if self.force:
                    print("Running actions non-interactive as you forced.")
                    self.execute_actions(iw, actions)
                    return action_meta
                cont = raw_input(
                    "Take recommended actions (y/N/a/R/T/DEBUG)? ")
                if cont in ('a', 'A'):
                    sys.exit(0)
                if cont in ('Y', 'y'):
                    self.execute_actions(iw, actions)
                if cont == 'T':
                    self.template_wizard(iw)
                    action_meta['REDO'] = True
                if cont == 'r' or cont == 'R':
                    action_meta['REDO'] = True
                if cont == 'DEBUG':
                    # put the user into a breakpoint to do live debug
                    action_meta['REDO'] = True
                    import epdb
                    epdb.st()
        elif self.always_pause:
            print("Skipping, but pause.")
            cont = raw_input("Continue (Y/n/a/R/T/DEBUG)? ")
            if cont in ('a', 'A', 'n', 'N'):
                sys.exit(0)
            if cont == 'T':
                self.template_wizard(iw)
                action_meta['REDO'] = True
            elif cont == 'REDO':
                action_meta['REDO'] = True
            elif cont == 'DEBUG':
                # put the user into a breakpoint to do live debug
                import epdb
                epdb.st()
                action_meta['REDO'] = True
        elif self.force_description_fixer:
            # FIXME: self.FIXED_ISSUES not defined since 1cf9674cd38edbd17aff906d72296c99043e5c13
            #        either define self.FIXED_ISSUES, either remove this method
            # FIXME force_description_fixer is not known by DefaultTriager (only
            #       by AnsibleTriage): if not removed, move it to AnsibleTriage
            if iw.html_url not in self.FIXED_ISSUES:
                if self.meta['template_missing_sections']:
                    changed = self.template_wizard(iw)
                    if changed:
                        action_meta['REDO'] = True
                self.FIXED_ISSUES.append(iw.html_url)
        else:
            print("Skipping.")

        # let the upper level code redo this issue
        return action_meta

    def template_wizard(self, iw):

        DF = DescriptionFixer(iw, self.meta)

        old = iw.body
        old_lines = old.split('\n')
        new = DF.new_description
        new_lines = new.split('\n')

        total_lines = len(new_lines)
        if len(old_lines) > total_lines:
            total_lines = len(old_lines)

        if len(new_lines) < total_lines:
            delta = total_lines - len(new_lines)
            for x in xrange(0, delta):
                new_lines.append('')

        if len(old_lines) < total_lines:
            delta = total_lines - len(old_lines)
            for x in xrange(0, delta):
                old_lines.append('')

        line = '--------------------------------------------------------'
        padding = 100
        print("%s|%s" % (line.ljust(padding), line))
        for c1, c2 in zip(old_lines, new_lines):
            if len(c1) > padding:
                c1 = c1[:padding - 4]
            if len(c2) > padding:
                c2 = c2[:padding - 4]
            print("%s|%s" % (c1.rstrip().ljust(padding), c2.rstrip()))
        print("%s|%s" % (line.rstrip().ljust(padding), line))

        print('# ' + iw.html_url)
        cont = raw_input("Apply this new description? (Y/N) ")
        if cont == 'Y':
            iw.set_description(DF.new_description)
            return True
        else:
            return False

    def execute_actions(self, iw, actions):
        """Turns the actions into API calls"""

        for comment in actions.comments:
            logging.info("acton: comment - " + comment)
            iw.add_comment(comment=comment)
        if actions.close:
            # https://github.com/PyGithub/PyGithub/blob/master/github/Issue.py#L263
            logging.info('action: close')
            iw.instance.edit(state='closed')
            return

        for unlabel in actions.unlabel:
            logging.info('action: unlabel - ' + unlabel)
            iw.remove_label(label=unlabel)
        for newlabel in actions.newlabel:
            logging.info('action: label - ' + newlabel)
            iw.add_label(label=newlabel)

        for user in actions.assign:
            logging.info('action: assign - ' + user)
            iw.assign_user(user)

        for user in actions.unassign:
            logging.info('action: unassign - ' + user)
            iw.unassign_user(user)

        if actions.merge:
            iw.merge()

    #@RateLimited
    def is_pr_merged(self, number, repo):
        '''Check if a PR# has been merged or not'''

        if number is None:
            import epdb
            epdb.st()
            raise Exception('Can not check merge state on the number: None')

        merged = False
        pr = None
        try:
            pr = repo.get_pullrequest(number)
        except Exception as e:
            print(e)
        if pr:
            try:
                merged = pr.merged
            except Exception as e:
                logging.debug(e)
                import epdb
                epdb.st()
        return merged

    def trigger_rate_limit(self):
        '''Repeatedly make calls to exhaust rate limit'''

        self.gh = self._connect()
        self.ghw = GithubWrapper(self.gh)

        while True:
            cachedir = os.path.join(self.cachedir_base, self.repo)
            thisrepo = self.ghw.get_repo(self.repo, verbose=False)
            issues = thisrepo.repo.get_issues()
            rl = thisrepo.get_rate_limit()
            pprint(rl)

            for issue in issues:
                iw = IssueWrapper(github=self.ghw,
                                  repo=thisrepo,
                                  issue=issue,
                                  cachedir=cachedir)
                iw.history
                rl = thisrepo.get_rate_limit()
                pprint(rl)

    def dump_action_dict(self, issue, actions):
        '''Serialize the action dict to disk for quick(er) debugging'''
        fn = os.path.join('/tmp', 'actions', issue.repo_full_name,
                          str(issue.number) + '.json')
        dn = os.path.dirname(fn)
        if not os.path.isdir(dn):
            os.makedirs(dn)

        logging.info('dumping {}'.format(fn))
        with open(fn, 'wb') as f:
            f.write(json.dumps(actions, indent=2, sort_keys=True))
Exemple #15
0
def test_get_request_rate_limited():
    GithubWrapper._connect = lambda *args: None
    gw = GithubWrapper(token=12345, cachedir=tempfile.mkdtemp())

    with pytest.raises(RateLimitError):
        gw.get_request('https://foo.bar.com/test')
Exemple #16
0
    def index_ecosystem(self):
        # index the ansible-collections org
        token = C.DEFAULT_GITHUB_TOKEN
        gh = Github(login_or_token=token)
        gw = GithubWrapper(gh, cachedir=self.cachedir)
        ac = gw.get_org('ansible-collections')

        cloneurls = set()
        for repo in ac.get_repos():
            #print(repo)
            cloneurls.add(repo.clone_url)
        cloneurls = [x.replace('.git', '') for x in cloneurls]

        for curl in cloneurls:
            if curl.endswith('/overview'):
                continue
            if curl.endswith('/collection_template'):
                continue
            if curl.endswith('/.github'):
                continue
            if curl.endswith('/hub'):
                continue
            grepo = GitRepoWrapper(cachedir=self.cachedir,
                                   repo=curl,
                                   rebase=False)

            # is there a galaxy.yml at the root level?
            if grepo.exists('galaxy.yml'):
                meta = yaml.load(grepo.get_file_content('galaxy.yml'))
                fqcn = '%s.%s' % (meta['namespace'], meta['name'])
                self._gitrepos[fqcn] = grepo
            else:
                # multi-collection repos ... sigh.
                galaxyfns = grepo.find('galaxy.yml')

                if galaxyfns:
                    for gfn in galaxyfns:
                        meta = yaml.load(grepo.get_file_content(gfn))
                        fqcn = '%s.%s' % (meta['namespace'], meta['name'])
                        _grepo = GitRepoWrapper(cachedir=self.cachedir,
                                                repo=curl,
                                                rebase=False,
                                                context=os.path.dirname(gfn))
                        self._gitrepos[fqcn] = _grepo
                else:

                    fqcn = None
                    bn = os.path.basename(curl)

                    # enumerate the url?
                    if '.' in bn:
                        fqcn = bn

                    # try the README?
                    if fqcn is None:
                        for fn in ['README.rst', 'README.md']:
                            if fqcn:
                                break
                            if not grepo.exists(fn):
                                continue
                            fdata = grepo.get_file_content(fn)
                            if not '.' in fdata:
                                continue
                            lines = fdata.split('\n')
                            for line in lines:
                                line = line.strip()
                                if line.lower().startswith(
                                        'ansible collection:'):
                                    fqcn = line.split(':')[-1].strip()
                                    break

                    # lame ...
                    if fqcn is None:
                        fqcn = bn + '._community'

                    self._gitrepos[fqcn] = grepo

        # scrape the galaxy collections api
        nexturl = self._baseurl + '/api/v2/collections/?page_size=1000'
        while nexturl:
            jdata = self._get_cached_url(nexturl)
            nexturl = jdata.get('next_link')
            if nexturl:
                nexturl = self._baseurl + nexturl

            for res in jdata.get('results', []):
                fqcn = '%s.%s' % (res['namespace']['name'], res['name'])
                if res.get('deprecated'):
                    continue
                if fqcn in self._gitrepos:
                    continue
                lv = res['latest_version']['href']
                lvdata = self._get_cached_url(lv)
                rurl = lvdata.get('metadata', {}).get('repository')
                if rurl is None:
                    rurl = lvdata['download_url']
                grepo = GitRepoWrapper(cachedir=self.cachedir,
                                       repo=rurl,
                                       rebase=False)
                self._gitrepos[fqcn] = grepo

        # reconcile all things ...
        self.GALAXY_FQCNS = sorted(set(self._gitrepos.keys()))
        self.GALAXY_FILES = {}
        for fqcn, gr in self._gitrepos.items():
            if fqcn.startswith('testing.'):
                continue
            for fn in gr.files:
                if fn not in self.GALAXY_FILES:
                    self.GALAXY_FILES[fn] = set()
                self.GALAXY_FILES[fn].add(fqcn)
Exemple #17
0
class DefaultTriager(object):

    ITERATION = 0

    def __init__(self, args):

        self.args = args
        self.last_run = None
        self.daemonize = None
        self.daemonize_interval = None
        self.dry_run = False
        self.force = False

        self.configfile = self.args.configfile
        self.config = ConfigParser.ConfigParser()
        self.config.read([self.configfile])

        try:
            self.github_user = self.config.get('defaults', 'github_username')
        except:
            self.github_user = None

        try:
            self.github_pass = self.config.get('defaults', 'github_password')
        except:
            self.github_pass = None

        try:
            self.github_token = self.config.get('defaults', 'github_token')
        except:
            self.github_token = None

        self.repopath = self.args.repo
        self.logfile = self.args.logfile

        # where to store junk
        self.cachedir = self.args.cachedir
        self.cachedir = os.path.expanduser(self.cachedir)
        self.cachedir_base = self.cachedir

        self.set_logger()
        logging.info('starting bot')

        logging.debug('setting bot attributes')
        for x in vars(self.args):
            val = getattr(self.args, x)
            setattr(self, x, val)

        if hasattr(self.args, 'pause') and self.args.pause:
            self.always_pause = True

        # connect to github
        logging.info('creating api connection')
        self.gh = self._connect()

        # wrap the connection
        logging.info('creating api wrapper')
        self.ghw = GithubWrapper(self.gh, cachedir=self.cachedir)

        # get valid labels
        logging.info('getting labels')
        self.valid_labels = self.get_valid_labels(self.repopath)

    @property
    def resume(self):
        '''Returns a dict with the last issue repo+number processed'''
        if not hasattr(self, 'args'):
            return None
        if hasattr(self.args, 'pr') and self.args.pr:
            return None
        if not hasattr(self.args, 'resume'):
            return None
        if not self.args.resume:
            return None

        if hasattr(self, 'cachedir_base'):
            resume_file = os.path.join(self.cachedir_base, 'resume.json')
        else:
            resume_file = os.path.join(self.cachedir, 'resume.json')
        if not os.path.isfile(resume_file):
            return None

        with open(resume_file, 'rb') as f:
            data = json.loads(f.read())
        return data

    def set_resume(self, repo, number):
        if not hasattr(self, 'args'):
            return None
        if hasattr(self.args, 'pr') and self.args.pr:
            return None
        if not hasattr(self.args, 'resume'):
            return None
        if not self.args.resume:
            return None

        data = {'repo': repo, 'number': number}
        if hasattr(self, 'cachedir_base'):
            resume_file = os.path.join(self.cachedir_base, 'resume.json')
        else:
            resume_file = os.path.join(self.cachedir, 'resume.json')
        with open(resume_file, 'wb') as f:
            f.write(json.dumps(data, indent=2))

    def set_logger(self):
        if hasattr(self.args, 'debug') and self.args.debug:
            logging.level = logging.DEBUG
        else:
            logging.level = logging.INFO
        logFormatter = \
            logging.Formatter("%(asctime)s %(levelname)s %(message)s")
        rootLogger = logging.getLogger()
        if hasattr(self.args, 'debug') and self.args.debug:
            rootLogger.setLevel(logging.DEBUG)
        else:
            rootLogger.setLevel(logging.INFO)

        if hasattr(self.args, 'logfile'):
            logfile = self.args.logfile
        else:
            logfile = '/tmp/ansibullbot.log'

        logdir = os.path.dirname(logfile)
        if logdir and not os.path.isdir(logdir):
            os.makedirs(logdir)

        fileHandler = logging.FileHandler(logfile)
        fileHandler.setFormatter(logFormatter)
        rootLogger.addHandler(fileHandler)
        consoleHandler = logging.StreamHandler()
        consoleHandler.setFormatter(logFormatter)
        rootLogger.addHandler(consoleHandler)

    def start(self):

        if hasattr(self.args, 'force_rate_limit') and \
                self.args.force_rate_limit:
            logging.warning('attempting to trigger rate limit')
            self.trigger_rate_limit()
            return

        if hasattr(self.args, 'daemonize') and self.args.daemonize:
            logging.info('starting daemonize loop')
            self.loop()
        else:
            logging.info('starting single run')
            self.run()
        logging.info('stopping bot')

    @RateLimited
    def _connect(self):
        """Connects to GitHub's API"""
        if self.github_token:
            return Github(login_or_token=self.github_token)
        else:
            return Github(login_or_token=self.github_user,
                          password=self.github_pass)

    @abc.abstractmethod
    def _get_repo_path(self):
        pass

    def is_pr(self, issue):
        if '/pull/' in issue.html_url:
            return True
        else:
            return False

    def is_issue(self, issue):
        return not self.is_pr(issue)

    @RateLimited
    def get_members(self, organization):
        """Get members of an organization

        Args:
            organization: name of the organization

        Returns:
            A list of GitHub login belonging to the organization
        """
        members = []
        update = False
        write_cache = False
        now = self.get_current_time()
        gh_org = self._connect().get_organization(organization)

        cachedir = self.cachedir
        if cachedir.endswith('/issues'):
            cachedir = os.path.dirname(cachedir)
        cachefile = os.path.join(cachedir, 'members.pickle')

        if not os.path.isdir(cachedir):
            os.makedirs(cachedir)

        if os.path.isfile(cachefile):
            with open(cachefile, 'rb') as f:
                mdata = pickle.load(f)
            members = mdata[1]
            if mdata[0] < gh_org.updated_at:
                update = True
        else:
            update = True
            write_cache = True

        if update:
            members = gh_org.get_members()
            members = [x.login for x in members]

        # save the data
        if write_cache:
            mdata = [now, members]
            with open(cachefile, 'wb') as f:
                pickle.dump(mdata, f)

        return members

    @RateLimited
    def get_core_team(self, organization, teams):
        """Get members of the core team

        Args:
            organization: name of the teams' organization
            teams: list of teams that compose the project core team

        Returns:
            A list of GitHub login belonging to teams
        """
        members = set()

        conn = self._connect()
        gh_org = conn.get_organization(organization)
        for team in gh_org.get_teams():
            if team.name in teams:
                for member in team.get_members():
                    members.add(member.login)

        return sorted(members)

    #@RateLimited
    def get_valid_labels(self, repo=None):

        # use the repo wrapper to enable caching+updating
        if not self.ghw:
            self.gh = self._connect()
            self.ghw = GithubWrapper(self.gh)

        if not repo:
            # OLD workflow
            self.repo = self.ghw.get_repo(self._get_repo_path())
            vlabels = []
            for vl in self.repo.get_labels():
                vlabels.append(vl.name)
        else:
            # v3 workflow
            rw = self.ghw.get_repo(repo)
            vlabels = []
            for vl in rw.get_labels():
                vlabels.append(vl.name)

        return vlabels

    def debug(self, msg=""):
        """Prints debug message if verbosity is given"""
        if self.verbose:
            print("Debug: " + msg)

    def loop(self):
        '''Call the run method in a defined interval'''
        while True:
            self.run()
            self.ITERATION += 1
            interval = self.args.daemonize_interval
            logging.info('sleep %ss (%sm)' % (interval, interval / 60))
            time.sleep(interval)

    @abc.abstractmethod
    def run(self):
        pass

    def get_current_time(self):
        return datetime.utcnow()

    def render_boilerplate(self, tvars, boilerplate=None):
        template = environment.get_template('%s.j2' % boilerplate)
        comment = template.render(**tvars)
        return comment

    def check_safe_match(self, iw, actions):
        """ Turn force on or off depending on match characteristics """
        safe_match = False

        if actions.count() == 0:
            safe_match = True

        elif not actions.close and not actions.unlabel:
            if len(actions.newlabel) == 1:
                if actions.newlabel[0].startswith('affects_'):
                    safe_match = True

        else:
            safe_match = False
            if self.module:
                if self.module in iw.instance.title.lower():
                    safe_match = True

        # be more lenient on re-notifications
        if not safe_match:
            if not actions.close and \
                    not actions.unlabel and \
                    not actions.newlabel:

                if len(actions.comments) == 1:
                    if 'still waiting' in actions.comments[0]:
                        safe_match = True

        if safe_match:
            self.force = True
        else:
            self.force = False

    def apply_actions(self, iw, actions):

        action_meta = {'REDO': False}

        if hasattr(self, 'safe_force') and self.safe_force:
            self.check_safe_match(iw, actions)

        if actions.count() > 0:

            if hasattr(self, 'args'):
                if hasattr(self.args, 'dump_actions'):
                    if self.args.dump_actions:
                        self.dump_action_dict(iw, actions)

            if self.dry_run:
                print("Dry-run specified, skipping execution of actions")
            else:
                if self.force:
                    print("Running actions non-interactive as you forced.")
                    self.execute_actions(iw, actions)
                    return action_meta
                cont = raw_input(
                    "Take recommended actions (y/N/a/R/T/DEBUG)? ")
                if cont in ('a', 'A'):
                    sys.exit(0)
                if cont in ('Y', 'y'):
                    self.execute_actions(iw, actions)
                if cont == 'T':
                    self.template_wizard(iw)
                    action_meta['REDO'] = True
                if cont == 'r' or cont == 'R':
                    action_meta['REDO'] = True
                if cont == 'DEBUG':
                    # put the user into a breakpoint to do live debug
                    action_meta['REDO'] = True
                    import epdb
                    epdb.st()
        elif self.always_pause:
            print("Skipping, but pause.")
            cont = raw_input("Continue (Y/n/a/R/T/DEBUG)? ")
            if cont in ('a', 'A', 'n', 'N'):
                sys.exit(0)
            if cont == 'T':
                self.template_wizard(iw)
                action_meta['REDO'] = True
            elif cont == 'REDO':
                action_meta['REDO'] = True
            elif cont == 'DEBUG':
                # put the user into a breakpoint to do live debug
                import epdb
                epdb.st()
                action_meta['REDO'] = True
        elif hasattr(self, 'force_description_fixer'
                     ) and self.args.force_description_fixer:
            if iw.html_url not in self.FIXED_ISSUES:
                if self.meta['template_missing_sections']:
                    changed = self.template_wizard(iw)
                    if changed:
                        action_meta['REDO'] = True
                self.FIXED_ISSUES.append(iw.html_url)
        else:
            print("Skipping.")

        # let the upper level code redo this issue
        return action_meta

    def template_wizard(self, iw):

        DF = DescriptionFixer(iw, self.meta)

        old = iw.body
        old_lines = old.split('\n')
        new = DF.new_description
        new_lines = new.split('\n')

        total_lines = len(new_lines)
        if len(old_lines) > total_lines:
            total_lines = len(old_lines)

        if len(new_lines) < total_lines:
            delta = total_lines - len(new_lines)
            for x in xrange(0, delta):
                new_lines.append('')

        if len(old_lines) < total_lines:
            delta = total_lines - len(old_lines)
            for x in xrange(0, delta):
                old_lines.append('')

        line = '--------------------------------------------------------'
        padding = 100
        print("%s|%s" % (line.ljust(padding), line))
        for c1, c2 in zip(old_lines, new_lines):
            if len(c1) > padding:
                c1 = c1[:padding - 4]
            if len(c2) > padding:
                c2 = c2[:padding - 4]
            print("%s|%s" % (c1.rstrip().ljust(padding), c2.rstrip()))
        print("%s|%s" % (line.rstrip().ljust(padding), line))

        print('# ' + iw.html_url)
        cont = raw_input("Apply this new description? (Y/N) ")
        if cont == 'Y':
            iw.set_description(DF.new_description)
            return True
        else:
            return False

    def execute_actions(self, iw, actions):
        """Turns the actions into API calls"""

        for comment in actions.comments:
            logging.info("acton: comment - " + comment)
            iw.add_comment(comment=comment)
        if actions.close:
            # https://github.com/PyGithub/PyGithub/blob/master/github/Issue.py#L263
            logging.info('action: close')
            iw.instance.edit(state='closed')
            return

        for unlabel in actions.unlabel:
            logging.info('action: unlabel - ' + unlabel)
            iw.remove_label(label=unlabel)
        for newlabel in actions.newlabel:
            logging.info('action: label - ' + newlabel)
            iw.add_label(label=newlabel)

        for user in actions.assign:
            logging.info('action: assign - ' + user)
            iw.assign_user(user)

        for user in actions.unassign:
            logging.info('action: unassign - ' + user)
            iw.unassign_user(user)

        if actions.merge:
            iw.merge()

    @RateLimited
    def is_pr_merged(self, number, repo=None):
        '''Check if a PR# has been merged or not'''
        merged = False
        pr = None
        try:
            if not repo:
                pr = self.repo.get_pullrequest(number)
            else:
                pr = repo.get_pullrequest(number)
        except Exception as e:
            print(e)
        if pr:
            merged = pr.merged
        return merged

    def dump_action_dict(self, issue, actions):
        '''Serialize the action dict to disk for quick(er) debugging'''
        fn = os.path.join('/tmp', 'actions', issue.repo_full_name,
                          str(issue.number) + '.json')
        dn = os.path.dirname(fn)
        if not os.path.isdir(dn):
            os.makedirs(dn)

        logging.info('dumping {}'.format(fn))
        with open(fn, 'wb') as f:
            f.write(json.dumps(actions, indent=2, sort_keys=True))
class DefaultTriager:
    """
    How to use:
    1. Create a new class which inherits from DefaultTriager
    2. Implement 'Triager.run(self)' method:
        - iterate over issues/pull requests
        - for each issue
        1. create 'actions = DefaultActions()'
        2. define which action(s) should be done updating 'actions' instance
        3. call parent 'apply_actions' methods: 'DefaultTriager.apply_actions(iw, actions)'
    3. Run:
    def main():
        Triager().start()
    """
    CLOSING_LABELS = []

    def __init__(self, args=None):
        parser = self.create_parser()
        self.args = parser.parse_args(args)

        logging.info('starting bot')
        self.set_logger()

        self.cachedir_base = os.path.expanduser(self.args.cachedir_base)
        self.issue_summaries = {}
        self.repos = {}

        # resume is just an overload for the start-at argument
        resume = self.get_resume()
        if resume:
            if self.args.sort == 'desc':
                self.args.start_at = resume['number'] - 1
            else:
                self.args.start_at = resume['number'] + 1

        logging.info('creating api wrapper')
        self.ghw = GithubWrapper(url=C.DEFAULT_GITHUB_URL,
                                 user=C.DEFAULT_GITHUB_USERNAME,
                                 passw=C.DEFAULT_GITHUB_PASSWORD,
                                 token=C.DEFAULT_GITHUB_TOKEN,
                                 cachedir=self.cachedir_base)

        logging.info('creating graphql client')
        self.gqlc = GithubGraphQLClient(C.DEFAULT_GITHUB_TOKEN,
                                        server=C.DEFAULT_GITHUB_URL)

        self._maintainer_team = None

    @property
    def maintainer_team(self):
        # Note: this assumes that the token used by the bot has access to check
        # team privileges across potentially more than one organization
        if self._maintainer_team is None:
            self._maintainer_team = []
            teams = C.DEFAULT_GITHUB_MAINTAINERS
            for team in teams:
                _org, _team = team.split('/')
                self._maintainer_team.extend(self.ghw.get_members(_org, _team))
        return sorted(
            set(self._maintainer_team).difference(C.DEFAULT_BOT_NAMES))

    @classmethod
    def create_parser(cls):
        parser = argparse.ArgumentParser()
        parser.add_argument(
            "--botmetafile",
            type=str,
            default=None,
            help="Use this filepath for botmeta instead of from the repo")
        parser.add_argument("--cachedir",
                            type=str,
                            dest='cachedir_base',
                            default='~/.ansibullbot/cache')
        parser.add_argument("--daemonize",
                            action="store_true",
                            help="run in a continuos loop")
        parser.add_argument("--daemonize_interval",
                            type=int,
                            default=(30 * 60),
                            help="seconds to sleep between loop iterations")
        parser.add_argument("--debug",
                            "-d",
                            action="store_true",
                            help="Debug output")
        parser.add_argument("--dry-run",
                            "-n",
                            action="store_true",
                            help="Don't make any changes")
        parser.add_argument(
            "--dump_actions",
            action="store_true",
            help="serialize the actions to disk [/tmp/actions]")
        parser.add_argument("--force",
                            "-f",
                            action="store_true",
                            help="Do not ask questions")
        parser.add_argument("--logfile",
                            type=str,
                            default='/var/log/ansibullbot.log',
                            help="Send logging to this file")
        parser.add_argument("--ignore_state",
                            action="store_true",
                            help="Do not skip processing closed issues")
        parser.add_argument("--last",
                            type=int,
                            help="triage the last N issues or PRs")
        parser.add_argument("--only_closed",
                            action="store_true",
                            help="Triage closed issues|prs only")
        parser.add_argument("--only_issues",
                            action="store_true",
                            help="Triage issues only")
        parser.add_argument("--only_prs",
                            action="store_true",
                            help="Triage pullrequests only")
        parser.add_argument("--pause",
                            "-p",
                            action="store_true",
                            dest="always_pause",
                            help="Always pause between prs|issues")
        parser.add_argument(
            "--pr",
            "--id",
            type=str,
            help="Triage only the specified pr|issue (separated by commas)")
        parser.add_argument(
            "--resume",
            action="store_true",
            dest="resume_enabled",
            help="pickup right after where the bot last stopped")
        parser.add_argument("--repo",
                            "-r",
                            type=str,
                            help="Github repo to triage (defaults to all)")
        parser.add_argument("--skiprepo",
                            action='append',
                            help="Github repo to skip triaging")
        parser.add_argument("--start-at",
                            type=int,
                            help="Start triage at the specified pr|issue")
        parser.add_argument("--sort",
                            default='desc',
                            choices=['asc', 'desc'],
                            help="Direction to sort issues [desc=9-0 asc=0-9]")
        return parser

    def set_logger(self):
        set_logger(debug=self.args.debug, logfile=self.args.logfile)

    def start(self):
        if self.args.daemonize:
            logging.info('starting daemonize loop')
            self.loop()
        else:
            logging.info('starting single run')
            self.run()
        logging.info('stopping bot')

    def loop(self):
        """Call the run method in a defined interval"""
        while True:
            self.run()
            interval = self.args.daemonize_interval
            logging.info('sleep %ss (%sm)' % (interval, interval / 60))
            time.sleep(interval)

    @abc.abstractmethod
    def run(self):
        pass

    def render_boilerplate(self, tvars, boilerplate=None):
        template = environment.get_template('%s.j2' % boilerplate)
        comment = template.render(**tvars)
        return comment

    def apply_actions(self, iw, actions):
        action_meta = {'REDO': False}

        if actions.count() > 0:
            if self.args.dump_actions:
                self.dump_action_dict(iw, actions.__dict__)

            if self.args.dry_run:
                print("Dry-run specified, skipping execution of actions")
            else:
                if self.args.force:
                    print("Running actions non-interactive as you forced.")
                    self.execute_actions(iw, actions)
                    return action_meta
                cont = input("Take recommended actions (y/N/a/R/DEBUG)? ")
                if cont in ('a', 'A'):
                    sys.exit(0)
                if cont in ('Y', 'y'):
                    self.execute_actions(iw, actions)
                if cont in ('r', 'R'):
                    action_meta['REDO'] = True
                if cont == 'DEBUG':
                    # put the user into a breakpoint to do live debug
                    action_meta['REDO'] = True
                    import epdb
                    epdb.st()
        elif self.args.always_pause:
            print("Skipping, but pause.")
            cont = input("Continue (Y/n/a/R/DEBUG)? ")
            if cont in ('a', 'A', 'n', 'N'):
                sys.exit(0)
            elif cont in ('r', 'R'):
                action_meta['REDO'] = True
            elif cont == 'DEBUG':
                # put the user into a breakpoint to do live debug
                import epdb
                epdb.st()
                action_meta['REDO'] = True
        else:
            print("Skipping.")

        # let the upper level code redo this issue
        return action_meta

    def execute_actions(self, iw, actions):
        """Turns the actions into API calls"""
        for commentid in actions.uncomment:
            iw.remove_comment_by_id(commentid)

        for comment in actions.comments:
            logging.info("acton: comment - " + comment)
            iw.add_comment(comment=comment)

        if actions.close:
            for newlabel in actions.newlabel:
                if newlabel in self.CLOSING_LABELS:
                    logging.info('action: label - ' + newlabel)
                    iw.add_label(label=newlabel)

            logging.info('action: close')
            iw.instance.edit(state='closed')

        else:
            for unlabel in actions.unlabel:
                logging.info('action: unlabel - ' + unlabel)
                iw.remove_label(label=unlabel)
            for newlabel in actions.newlabel:
                logging.info('action: label - ' + newlabel)
                iw.add_label(label=newlabel)

            if actions.merge:
                iw.merge()

    def dump_action_dict(self, issue, actions):
        """Serialize the action dict to disk for quick(er) debugging"""
        fn = os.path.join('/tmp', 'actions', issue.repo_full_name,
                          str(issue.number) + '.json')
        dn = os.path.dirname(fn)
        if not os.path.isdir(dn):
            os.makedirs(dn)

        logging.info(f'dumping {fn}')
        with open(fn, 'w') as f:
            f.write(json.dumps(actions, indent=2, sort_keys=True))

    def get_resume(self):
        '''Returns a dict with the last issue repo+number processed'''
        if self.args.pr or not self.args.resume_enabled:
            return

        resume_file = os.path.join(self.cachedir_base, 'resume.json')
        if not os.path.isfile(resume_file):
            logging.error('Resume: %r not found', resume_file)
            return None

        logging.debug('Resume: read %r', resume_file)
        with open(resume_file, 'r', encoding='utf-8') as f:
            data = json.loads(f.read())
        return data

    def set_resume(self, repo, number):
        if self.args.pr or not self.args.resume_enabled:
            return

        data = {'repo': repo, 'number': number}
        resume_file = os.path.join(self.cachedir_base, 'resume.json')
        with open(resume_file, 'w', encoding='utf-8') as f:
            json.dump(data, f)

    def eval_pr_param(self, pr):
        '''PR/ID can be a number, numberlist, script, jsonfile, or url'''

        if isinstance(pr, list):
            pass

        elif pr.isdigit():
            pr = int(pr)

        elif pr.startswith('http'):
            rr = requests.get(pr)
            numbers = rr.json()
            pr = numbers[:]

        elif os.path.isfile(pr) and not os.access(pr, os.X_OK):
            with open(pr) as f:
                numbers = json.loads(f.read())
            pr = numbers[:]

        elif os.path.isfile(pr) and os.access(pr, os.X_OK):
            # allow for scripts when trying to target spec issues
            logging.info('executing %s' % pr)
            (rc, so, se) = run_command(pr)
            numbers = json.loads(to_text(so))
            if numbers:
                if isinstance(numbers[0], dict) and 'number' in numbers[0]:
                    numbers = [x['number'] for x in numbers]
                else:
                    numbers = [int(x) for x in numbers]
            logging.info('%s numbers after running script' % len(numbers))
            pr = numbers[:]

        elif ',' in pr:
            numbers = [int(x) for x in pr.split(',')]
            pr = numbers[:]

        if not isinstance(pr, list):
            pr = [pr]

        return pr

    def update_issue_summaries(self, repopath=None, issuenums=None):
        if issuenums and len(issuenums) <= 10:
            self.issue_summaries[repopath] = {}

            for num in issuenums:
                # --pr is an alias to --id and can also be for issues
                node = self.gqlc.get_summary(repopath, 'pullRequest', num)
                if node is None:
                    node = self.gqlc.get_summary(repopath, 'issue', num)
                if node is not None:
                    self.issue_summaries[repopath][to_text(num)] = node
        else:
            self.issue_summaries[repopath] = self.gqlc.get_issue_summaries(
                repopath)

    def get_stale_numbers(self, reponame):
        stale = []
        for number, summary in self.issue_summaries[reponame].items():
            if number in stale:
                continue

            if summary['state'] == 'closed':
                continue

            number = int(number)
            mfile = os.path.join(self.cachedir_base, reponame, 'issues',
                                 to_text(number), 'meta.json')

            if not os.path.isfile(mfile):
                stale.append(number)
                continue

            try:
                with open(mfile, 'rb') as f:
                    meta = json.load(f)
            except ValueError as e:
                logging.error('failed to parse %s: %s' %
                              (to_text(mfile), to_text(e)))
                os.remove(mfile)
                stale.append(number)
                continue

            delta = (datetime.datetime.now() -
                     strip_time_safely(meta['time'])).days
            if delta > C.DEFAULT_STALE_WINDOW:
                stale.append(number)

        stale = sorted({int(x) for x in stale})
        if 10 >= len(stale) > 0:
            logging.info('stale: %s' % ','.join([to_text(x) for x in stale]))

        return stale

    @RateLimited
    def _collect_repo(self, repo, issuenums=None):
        '''Collect issues for an individual repo'''
        logging.info('getting repo obj for %s' % repo)
        if repo not in self.repos:
            gitrepo = GitRepoWrapper(
                cachedir=self.cachedir_base,
                repo=f'https://github.com/{repo}',
                commit=self.args.ansible_commit,
            )
            self.repos[repo] = {
                'repo': self.ghw.get_repo(repo),
                'issues': [],
                'processed': [],
                'since': None,
                'stale': [],
                'loopcount': 0,
                'labels': self.ghw.get_valid_labels(repo),
                'gitrepo': gitrepo,
            }
        else:
            # force a clean repo object to limit caching problems
            logging.info('updating repo')
            self.repos[repo]['repo'] = self.ghw.get_repo(repo)
            logging.info('updating checkout')
            self.repos[repo]['gitrepo'].update()

            # clear the issues
            self.repos[repo]['issues'] = {}
            # increment the loopcount
            self.repos[repo]['loopcount'] += 1

        logging.info('getting issue objs for %s' % repo)
        self.update_issue_summaries(repopath=repo, issuenums=issuenums)

        issuecache = {}
        numbers = self.issue_summaries[repo].keys()
        numbers = {int(x) for x in numbers}
        if issuenums:
            numbers.intersection_update(issuenums)
            numbers = list(numbers)
        logging.info('%s known numbers' % len(numbers))

        if self.args.daemonize:

            if not self.repos[repo]['since']:
                ts = [
                    x[1]['updated_at']
                    for x in self.issue_summaries[repo].items()
                    if x[1]['updated_at']
                ]
                ts += [
                    x[1]['created_at']
                    for x in self.issue_summaries[repo].items()
                    if x[1]['created_at']
                ]
                ts = sorted(set(ts))
                if ts:
                    self.repos[repo]['since'] = ts[-1]
            else:
                since = strip_time_safely(self.repos[repo]['since'])
                api_since = self.repos[repo]['repo'].get_issues(since=since)

                numbers = []
                for x in api_since:
                    numbers.append(x.number)
                    issuecache[x.number] = x

                numbers = sorted({int(n) for n in numbers})
                logging.info('%s numbers after [api] since == %s' %
                             (len(numbers), since))

                for k, v in self.issue_summaries[repo].items():
                    if v['created_at'] is None:
                        # issue is closed and was never processed
                        continue

                    if v['created_at'] > self.repos[repo]['since']:
                        numbers.append(k)

                numbers = sorted({int(n) for n in numbers})
                logging.info('%s numbers after [www] since == %s' %
                             (len(numbers), since))

        if self.args.start_at and self.repos[repo]['loopcount'] == 0:
            numbers = [x for x in numbers if x <= self.args.start_at]
            logging.info('%s numbers after start-at' % len(numbers))

        # Get stale numbers if not targeting
        if self.args.daemonize and self.repos[repo]['loopcount'] > 0:
            logging.info('checking for stale numbers')
            stale = self.get_stale_numbers(repo)
            self.repos[repo]['stale'] = [int(x) for x in stale]
            numbers += [int(x) for x in stale]
            numbers = sorted(set(numbers))
            logging.info('%s numbers after stale check' % len(numbers))

        ################################################################
        # PRE-FILTERING TO PREVENT EXCESSIVE API CALLS
        ################################################################

        # filter just the open numbers
        if not self.args.only_closed and not self.args.ignore_state:
            numbers = [
                x for x in numbers
                if (to_text(x) in self.issue_summaries[repo] and
                    self.issue_summaries[repo][to_text(x)]['state'] == 'open')
            ]
            logging.info('%s numbers after checking state' % len(numbers))

        # filter by type
        if self.args.only_issues:
            numbers = [
                x for x in numbers
                if self.issue_summaries[repo][to_text(x)]['type'] == 'issue'
            ]
            logging.info('%s numbers after checking type' % len(numbers))
        elif self.args.only_prs:
            numbers = [
                x for x in numbers if self.issue_summaries[repo][to_text(x)]
                ['type'] == 'pullrequest'
            ]
            logging.info('%s numbers after checking type' % len(numbers))

        numbers = sorted({int(x) for x in numbers})
        if self.args.sort == 'desc':
            numbers = [x for x in reversed(numbers)]

        if self.args.last and len(numbers) > self.args.last:
            numbers = numbers[0 - self.args.last:]

        # Use iterator to avoid requesting all issues upfront
        self.repos[repo]['issues'] = RepoIssuesIterator(
            self.repos[repo]['repo'], numbers, issuecache=issuecache)

        logging.info('getting repo objs for %s complete' % repo)

    def collect_repos(self):
        '''Populate the local cache of repos'''
        logging.info('start collecting repos')
        for repo in C.DEFAULT_GITHUB_REPOS:
            # skip repos based on args
            if self.args.repo and self.args.repo != repo:
                continue
            if self.args.skiprepo:
                if repo in self.args.skiprepo:
                    continue

            if self.args.pr:
                numbers = self.eval_pr_param(self.args.pr)
                self._collect_repo(repo, issuenums=numbers)
            else:
                self._collect_repo(repo)
        logging.info('finished collecting issues')
Exemple #19
0
    def __init__(self):
        self.gh = GithubWrapper(None, token=C.DEFAULT_GITHUB_TOKEN)

        self.cachedir = '/tmp/pings.cache'
        if not os.path.exists(self.cachedir):
            os.makedirs(self.cachedir)

        bylogin = {}
        byissue = {}

        numbers = self.get_numbers()
        for idn, number in enumerate(numbers):

            logging.info('%s|%s issue %s' % (len(numbers), idn + 1, number))

            #if idn < 6000:
            #    continue

            if idn > 7000:
                break

            issue = self.get_issue(number)

            if 'url' not in issue:
                continue

            url = issue['url']
            labels = [x['name'] for x in issue['labels']]
            login = issue['user']['login']

            byissue[url] = {
                'login': login,
                'team': set(),
                'mentions': {},
                'mentioned': None,
                'responded': None,
                'bug': 'bug' in labels,
                'feature': 'feature' in labels,
                'pull': 'pull' in issue['html_url']
            }

            comments = self.get_comments(number)
            if not comments:
                continue

            for comment in comments:

                if comment is None:
                    import epdb
                    epdb.st()
                if comment['user'] is None:
                    #import epdb; epdb.st()
                    continue

                login = comment['user']['login']
                mentions = self.parse_mentions(comment['body'])

                if mentions:
                    for mention in mentions:
                        byissue[url]['team'].add(mention)

                        if mention not in byissue[url]['mentions']:
                            byissue[url]['mentions'][mention] = {
                                'mentioned': comment['created_at'],
                                'responded': None
                            }
                        if comment['created_at'] < byissue[url]['mentions'][
                                mention]['mentioned']:
                            byissue[url]['mentions'][mention][
                                'mentioned'] = comment['created_at']

                    # team generally mentioned?
                    if byissue[url]['mentioned'] is None or \
                            byissue[url]['mentioned'] > comment['created_at']:
                        byissue[url]['mentioned'] = comment['created_at']

            for comment in comments:
                if comment is None:
                    import epdb
                    epdb.st()
                if comment['user'] is None:
                    #import epdb; epdb.st()
                    continue

                login = comment['user']['login']

                if login in byissue[url]['team']:
                    # team generally responded?
                    if byissue[url]['responded'] is None or \
                            byissue[url]['responded'] > comment['created_at']:
                        byissue[url]['responded'] = comment['created_at']

                    if byissue[url]['mentions'][mention]['responded'] is None or \
                            byissue[url]['mentions'][mention]['responded'] > comment['created_at']:
                        byissue[url]['mentions'][mention][
                            'responded'] = comment['created_at']

        report(byissue)
Exemple #20
0
class Scraper:
    def __init__(self):
        self.gh = GithubWrapper(None, token=C.DEFAULT_GITHUB_TOKEN)

        self.cachedir = '/tmp/pings.cache'
        if not os.path.exists(self.cachedir):
            os.makedirs(self.cachedir)

        bylogin = {}
        byissue = {}

        numbers = self.get_numbers()
        for idn, number in enumerate(numbers):

            logging.info('%s|%s issue %s' % (len(numbers), idn + 1, number))

            #if idn < 6000:
            #    continue

            if idn > 7000:
                break

            issue = self.get_issue(number)

            if 'url' not in issue:
                continue

            url = issue['url']
            labels = [x['name'] for x in issue['labels']]
            login = issue['user']['login']

            byissue[url] = {
                'login': login,
                'team': set(),
                'mentions': {},
                'mentioned': None,
                'responded': None,
                'bug': 'bug' in labels,
                'feature': 'feature' in labels,
                'pull': 'pull' in issue['html_url']
            }

            comments = self.get_comments(number)
            if not comments:
                continue

            for comment in comments:

                if comment is None:
                    import epdb
                    epdb.st()
                if comment['user'] is None:
                    #import epdb; epdb.st()
                    continue

                login = comment['user']['login']
                mentions = self.parse_mentions(comment['body'])

                if mentions:
                    for mention in mentions:
                        byissue[url]['team'].add(mention)

                        if mention not in byissue[url]['mentions']:
                            byissue[url]['mentions'][mention] = {
                                'mentioned': comment['created_at'],
                                'responded': None
                            }
                        if comment['created_at'] < byissue[url]['mentions'][
                                mention]['mentioned']:
                            byissue[url]['mentions'][mention][
                                'mentioned'] = comment['created_at']

                    # team generally mentioned?
                    if byissue[url]['mentioned'] is None or \
                            byissue[url]['mentioned'] > comment['created_at']:
                        byissue[url]['mentioned'] = comment['created_at']

            for comment in comments:
                if comment is None:
                    import epdb
                    epdb.st()
                if comment['user'] is None:
                    #import epdb; epdb.st()
                    continue

                login = comment['user']['login']

                if login in byissue[url]['team']:
                    # team generally responded?
                    if byissue[url]['responded'] is None or \
                            byissue[url]['responded'] > comment['created_at']:
                        byissue[url]['responded'] = comment['created_at']

                    if byissue[url]['mentions'][mention]['responded'] is None or \
                            byissue[url]['mentions'][mention]['responded'] > comment['created_at']:
                        byissue[url]['mentions'][mention][
                            'responded'] = comment['created_at']

        report(byissue)

    def get_numbers(self):
        gq_cache_file = os.path.join(self.cachedir, 'gql_cache.json')

        if not os.path.exists(gq_cache_file):
            gqlc = GithubGraphQLClient(C.DEFAULT_GITHUB_TOKEN)
            summaries = gqlc.get_issue_summaries('ansible/ansible')
            with open(gq_cache_file, 'w') as f:
                f.write(json.dumps(summaries))
        else:
            with open(gq_cache_file, 'r') as f:
                summaries = json.loads(f.read())

        numbers = set()
        for k, v in summaries.items():
            #if v['state'] != 'open':
            #    continue
            numbers.add(v['number'])
        numbers = sorted(numbers, reverse=True)
        return numbers

    def get_issue(self, number):
        issue_url = 'https://api.github.com/repos/ansible/ansible/issues/%s' % number
        issue = self.get_url(issue_url)
        return issue

    def get_comments(self, number):
        issue_url = 'https://api.github.com/repos/ansible/ansible/issues/%s' % number
        issue = self.get_url(issue_url)
        comments_url = 'https://api.github.com/repos/ansible/ansible/issues/%s/comments' % number
        comments = self.get_url(comments_url)

        reviews = []
        if 'pull' in issue['html_url']:
            pull = self.get_url(issue['pull_request']['url'])
            if pull['review_comments'] > 0:
                reviews = self.get_url(pull['review_comments_url'])

        return comments + reviews

    def get_url(self, url):
        cachedir = os.path.join(self.cachedir, 'requests')
        if not os.path.exists(cachedir):
            os.makedirs(cachedir)
        m = hashlib.md5()
        m.update(url.encode('utf-8'))
        digest = m.hexdigest()

        cachefile = os.path.join(cachedir, '%s.json' % digest)
        if not os.path.exists(cachefile):
            data = self.gh.get_request(url)
            with open(cachefile, 'w') as f:
                f.write(json.dumps(data))
        else:
            with open(cachefile, 'r') as f:
                data = json.loads(f.read())

        return data

    def parse_mentions(self, body):

        mentioned = set()

        if '@' in body:
            words = body.split()
            for word in words:
                if word.startswith('@'):
                    login = word.replace('@', '')
                    if not login.strip():
                        continue
                    if '"' in login:
                        continue
                    if "'" in login:
                        continue
                    if '(' in login:
                        continue
                    if ')' in login:
                        continue
                    if '/' in login:
                        continue
                    if '\\' in login:
                        continue
                    if '{' in login:
                        continue
                    login = login.rstrip(',')

                    if login:
                        mentioned.add(login)

        return list(mentioned)
Exemple #21
0
class DefaultTriager:
    """
    How to use:
    1. Create a new class which inherits from DefaultTriager
    2. Implement 'Triager.run(self)' method:
        - iterate over issues/pull requests
        - for each issue
        1. create 'actions = DefaultActions()'
        2. define which action(s) should be done updating 'actions' instance
        3. call parent 'apply_actions' methods: 'DefaultTriager.apply_actions(iw, actions)'
    3. Run:
    def main():
        Triager().start()
    """
    ITERATION = 0
    debug = False
    cachedir_base = None
    BOTNAMES = C.DEFAULT_BOT_NAMES
    CLOSING_LABELS = []

    def __init__(self, args=None):
        pass

    @classmethod
    def create_parser(cls):
        """Creates an argument parser

        Returns:
            A argparse.ArgumentParser object
        """
        parser = argparse.ArgumentParser()
        parser.add_argument("--cachedir",
                            type=str,
                            dest='cachedir_base',
                            default='~/.ansibullbot/cache')
        parser.add_argument("--logfile",
                            type=str,
                            default='/var/log/ansibullbot.log',
                            help="Send logging to this file")
        parser.add_argument("--daemonize",
                            action="store_true",
                            help="run in a continuos loop")
        parser.add_argument("--daemonize_interval",
                            type=int,
                            default=(30 * 60),
                            help="seconds to sleep between loop iterations")
        parser.add_argument("--debug",
                            "-d",
                            action="store_true",
                            help="Debug output")
        parser.add_argument("--verbose",
                            "-v",
                            action="store_true",
                            help="Verbose output")
        parser.add_argument("--dry-run",
                            "-n",
                            action="store_true",
                            help="Don't make any changes")
        parser.add_argument("--force",
                            "-f",
                            action="store_true",
                            help="Do not ask questions")
        parser.add_argument("--pause",
                            "-p",
                            action="store_true",
                            dest="always_pause",
                            help="Always pause between prs|issues")
        parser.add_argument("--force_rate_limit",
                            action="store_true",
                            help="debug: force the rate limit")
        # useful for debugging
        parser.add_argument(
            "--dump_actions",
            action="store_true",
            help="serialize the actions to disk [/tmp/actions]")
        parser.add_argument(
            "--botmetafile",
            type=str,
            default=None,
            help="Use this filepath for botmeta instead of from the repo")
        return parser

    def set_logger(self):
        set_logger(debug=self.debug, logfile=self.logfile)

    def start(self):

        if self.force_rate_limit:
            logging.warning('attempting to trigger rate limit')
            self.trigger_rate_limit()
            return

        if self.daemonize:
            logging.info('starting daemonize loop')
            self.loop()
        else:
            logging.info('starting single run')
            self.run()
        logging.info('stopping bot')

    @RateLimited
    def _connect(self):
        """Connects to GitHub's API"""
        if self.github_token:
            return Github(base_url=self.github_url,
                          login_or_token=self.github_token)
        else:
            return Github(base_url=self.github_url,
                          login_or_token=self.github_user,
                          password=self.github_pass)

    def is_pr(self, issue):
        if '/pull/' in issue.html_url:
            return True
        else:
            return False

    def is_issue(self, issue):
        return not self.is_pr(issue)

    @RateLimited
    def get_members(self, organization):
        """Get members of an organization

        Args:
            organization: name of the organization

        Returns:
            A list of GitHub login belonging to the organization
        """
        members = []
        update = False
        write_cache = False
        now = self.get_current_time()
        gh_org = self._connect().get_organization(organization)

        cachedir = os.path.join(self.cachedir_base, organization)
        if not os.path.isdir(cachedir):
            os.makedirs(cachedir)

        cachefile = os.path.join(cachedir, 'members.pickle')

        if os.path.isfile(cachefile):
            with open(cachefile, 'rb') as f:
                mdata = pickle.load(f)
            members = mdata[1]
            if mdata[0] < gh_org.updated_at:
                update = True
        else:
            update = True
            write_cache = True

        if update:
            members = gh_org.get_members()
            members = [x.login for x in members]

        # save the data
        if write_cache:
            mdata = [now, members]
            with open(cachefile, 'wb') as f:
                pickle.dump(mdata, f)

        return members

    @RateLimited
    def get_core_team(self, organization, teams):
        """Get members of the core team

        Args:
            organization: name of the teams' organization
            teams: list of teams that compose the project core team

        Returns:
            A list of GitHub login belonging to teams
        """
        members = set()

        conn = self._connect()
        gh_org = conn.get_organization(organization)
        for team in gh_org.get_teams():
            if team.name in teams:
                for member in team.get_members():
                    members.add(member.login)

        return sorted(members)

    #@RateLimited
    def get_valid_labels(self, repo):

        # use the repo wrapper to enable caching+updating
        if not self.ghw:
            self.gh = self._connect()
            self.ghw = GithubWrapper(self.gh)

        rw = self.ghw.get_repo(repo)
        vlabels = []
        for vl in rw.labels:
            vlabels.append(vl.name)

        return vlabels

    def loop(self):
        '''Call the run method in a defined interval'''
        while True:
            self.run()
            self.ITERATION += 1
            interval = self.daemonize_interval
            logging.info('sleep %ss (%sm)' % (interval, interval / 60))
            time.sleep(interval)

    @abc.abstractmethod
    def run(self):
        pass

    def get_current_time(self):
        return datetime.utcnow()

    def render_boilerplate(self, tvars, boilerplate=None):
        template = environment.get_template('%s.j2' % boilerplate)
        comment = template.render(**tvars)
        return comment

    def apply_actions(self, iw, actions):
        action_meta = {'REDO': False}

        if actions.count() > 0:
            if self.dump_actions:
                self.dump_action_dict(iw, actions)

            if self.dry_run:
                print("Dry-run specified, skipping execution of actions")
            else:
                if self.force:
                    print("Running actions non-interactive as you forced.")
                    self.execute_actions(iw, actions)
                    return action_meta
                cont = input("Take recommended actions (y/N/a/R/DEBUG)? ")
                if cont in ('a', 'A'):
                    sys.exit(0)
                if cont in ('Y', 'y'):
                    self.execute_actions(iw, actions)
                if cont in ('r', 'R'):
                    action_meta['REDO'] = True
                if cont == 'DEBUG':
                    # put the user into a breakpoint to do live debug
                    action_meta['REDO'] = True
                    import epdb
                    epdb.st()
        elif self.always_pause:
            print("Skipping, but pause.")
            cont = input("Continue (Y/n/a/R/DEBUG)? ")
            if cont in ('a', 'A', 'n', 'N'):
                sys.exit(0)
            elif cont in ('r', 'R'):
                action_meta['REDO'] = True
            elif cont == 'DEBUG':
                # put the user into a breakpoint to do live debug
                import epdb
                epdb.st()
                action_meta['REDO'] = True
        else:
            print("Skipping.")

        # let the upper level code redo this issue
        return action_meta

    def execute_actions(self, iw, actions):
        """Turns the actions into API calls"""

        for commentid in actions.uncomment:
            iw.remove_comment_by_id(commentid)

        for comment in actions.comments:
            logging.info("acton: comment - " + comment)
            iw.add_comment(comment=comment)

        if actions.close:
            for newlabel in actions.newlabel:
                if newlabel in self.CLOSING_LABELS:
                    logging.info('action: label - ' + newlabel)
                    iw.add_label(label=newlabel)

            # https://github.com/PyGithub/PyGithub/blob/master/github/Issue.py#L263
            logging.info('action: close')
            iw.instance.edit(state='closed')

        else:
            for unlabel in actions.unlabel:
                logging.info('action: unlabel - ' + unlabel)
                iw.remove_label(label=unlabel)
            for newlabel in actions.newlabel:
                logging.info('action: label - ' + newlabel)
                iw.add_label(label=newlabel)

            for user in actions.assign:
                logging.info('action: assign - ' + user)
                iw.assign_user(user)

            for user in actions.unassign:
                logging.info('action: unassign - ' + user)
                iw.unassign_user(user)

            if actions.merge:
                iw.merge()

        # FIXME why?
        self.build_history(iw)

    #@RateLimited
    def is_pr_merged(self, number, repo):
        '''Check if a PR# has been merged or not'''

        if number is None:
            raise Exception('Can not check merge state on the number: None')

        merged = False
        pr = None
        try:
            pr = repo.get_pullrequest(number)
        except Exception as e:
            print(e)
        if pr:
            try:
                merged = pr.merged
            except Exception as e:
                logging.debug(e)
        return merged

    def trigger_rate_limit(self):
        '''Repeatedly make calls to exhaust rate limit'''

        self.gh = self._connect()
        self.ghw = GithubWrapper(self.gh)

        while True:
            cachedir = os.path.join(self.cachedir_base, self.repo)
            thisrepo = self.ghw.get_repo(self.repo, verbose=False)
            issues = thisrepo.repo.get_issues()
            rl = thisrepo.get_rate_limit()
            pprint(rl)

            for issue in issues:
                iw = IssueWrapper(github=self.ghw,
                                  repo=thisrepo,
                                  issue=issue,
                                  cachedir=cachedir)
                iw.history
                rl = thisrepo.get_rate_limit()
                pprint(rl)

    def dump_action_dict(self, issue, actions):
        '''Serialize the action dict to disk for quick(er) debugging'''
        fn = os.path.join('/tmp', 'actions', issue.repo_full_name,
                          to_text(issue.number) + '.json')
        dn = os.path.dirname(fn)
        if not os.path.isdir(dn):
            os.makedirs(dn)

        logging.info(f'dumping {fn}')
        with open(fn, 'wb') as f:
            f.write(json.dumps(actions, indent=2, sort_keys=True))
Exemple #22
0
class DefaultTriager(object):
    """
    How to use:
    1. Create a new class which inherits from DefaultTriager
    2. Implement 'Triager.run(self)' method:
        - iterate over issues/pull requests
        - for each issue
        1. create 'actions = DefaultActions()'
        2. define which action(s) should be done updating 'actions' instance
        3. call parent 'apply_actions' methods: 'DefaultTriager.apply_actions(iw, actions)'
    3. Run:
    def main():
        Triager().start()
    """
    ITERATION = 0

    def __init__(self):

        parser = self.create_parser()
        args = parser.parse_args()

        for x in vars(args):
            val = getattr(args, x)
            setattr(self, x, val)

        self.last_run = None

        self.github_url = C.DEFAULT_GITHUB_URL
        self.github_user = C.DEFAULT_GITHUB_USERNAME
        self.github_pass = C.DEFAULT_GITHUB_PASSWORD
        self.github_token = C.DEFAULT_GITHUB_TOKEN

        # where to store junk
        self.cachedir_base = os.path.expanduser(self.cachedir_base)

        self.set_logger()
        logging.info('starting bot')

        # connect to github
        logging.info('creating api connection')
        self.gh = self._connect()

        # wrap the connection
        logging.info('creating api wrapper')
        self.ghw = GithubWrapper(self.gh, cachedir=self.cachedir_base)

    @classmethod
    def create_parser(cls):
        """Creates an argument parser

        Returns:
            A argparse.ArgumentParser object
        """
        parser = argparse.ArgumentParser()
        parser.add_argument("--cachedir", type=str, dest='cachedir_base',
                            default='~/.ansibullbot/cache')
        parser.add_argument("--logfile", type=str,
                            default='/var/log/ansibullbot.log',
                            help="Send logging to this file")
        parser.add_argument("--daemonize", action="store_true",
                            help="run in a continuos loop")
        parser.add_argument("--daemonize_interval", type=int, default=(30 * 60),
                            help="seconds to sleep between loop iterations")
        parser.add_argument("--debug", "-d", action="store_true",
                            help="Debug output")
        parser.add_argument("--verbose", "-v", action="store_true",
                            help="Verbose output")
        parser.add_argument("--dry-run", "-n", action="store_true",
                            help="Don't make any changes")
        parser.add_argument("--force", "-f", action="store_true",
                            help="Do not ask questions")
        parser.add_argument("--pause", "-p", action="store_true", dest="always_pause",
                            help="Always pause between prs|issues")
        parser.add_argument("--force_rate_limit", action="store_true",
                            help="debug: force the rate limit")
        parser.add_argument("--force_description_fixer", action="store_true",
                            help="Always invoke the description fixer")
        # useful for debugging
        parser.add_argument("--dump_actions", action="store_true",
                            help="serialize the actions to disk [/tmp/actions]")
        parser.add_argument("--botmetafile", type=str,
                            default=None,
                            help="Use this filepath for botmeta instead of from the repo")
        return parser

    def set_logger(self):
        if self.debug:
            logging.level = logging.DEBUG
        else:
            logging.level = logging.INFO
        logFormatter = \
            logging.Formatter("%(asctime)s %(levelname)s %(message)s")
        rootLogger = logging.getLogger()
        if self.debug:
            rootLogger.setLevel(logging.DEBUG)
        else:
            rootLogger.setLevel(logging.INFO)

        logdir = os.path.dirname(self.logfile)
        if logdir and not os.path.isdir(logdir):
            os.makedirs(logdir)

        fileHandler = WatchedFileHandler(self.logfile)
        fileHandler.setFormatter(logFormatter)
        rootLogger.addHandler(fileHandler)
        consoleHandler = logging.StreamHandler()
        consoleHandler.setFormatter(logFormatter)
        rootLogger.addHandler(consoleHandler)

    def start(self):

        if self.force_rate_limit:
            logging.warning('attempting to trigger rate limit')
            self.trigger_rate_limit()
            return

        if self.daemonize:
            logging.info('starting daemonize loop')
            self.loop()
        else:
            logging.info('starting single run')
            self.run()
        logging.info('stopping bot')

    @RateLimited
    def _connect(self):
        """Connects to GitHub's API"""
        if self.github_token:
            return Github(base_url=self.github_url, login_or_token=self.github_token)
        else:
            return Github(
                base_url=self.github_url,
                login_or_token=self.github_user,
                password=self.github_pass
            )

    def is_pr(self, issue):
        if '/pull/' in issue.html_url:
            return True
        else:
            return False

    def is_issue(self, issue):
        return not self.is_pr(issue)

    @RateLimited
    def get_members(self, organization):
        """Get members of an organization

        Args:
            organization: name of the organization

        Returns:
            A list of GitHub login belonging to the organization
        """
        members = []
        update = False
        write_cache = False
        now = self.get_current_time()
        gh_org = self._connect().get_organization(organization)

        cachedir = os.path.join(self.cachedir_base, organization)
        if not os.path.isdir(cachedir):
            os.makedirs(cachedir)

        cachefile = os.path.join(cachedir, 'members.pickle')

        if os.path.isfile(cachefile):
            with open(cachefile, 'rb') as f:
                mdata = pickle_load(f)
            members = mdata[1]
            if mdata[0] < gh_org.updated_at:
                update = True
        else:
            update = True
            write_cache = True

        if update:
            members = gh_org.get_members()
            members = [x.login for x in members]

        # save the data
        if write_cache:
            mdata = [now, members]
            with open(cachefile, 'wb') as f:
                pickle_dump(mdata, f)

        return members

    @RateLimited
    def get_core_team(self, organization, teams):
        """Get members of the core team

        Args:
            organization: name of the teams' organization
            teams: list of teams that compose the project core team

        Returns:
            A list of GitHub login belonging to teams
        """
        members = set()

        conn = self._connect()
        gh_org = conn.get_organization(organization)
        for team in gh_org.get_teams():
            if team.name in teams:
                for member in team.get_members():
                    members.add(member.login)

        return sorted(members)

    #@RateLimited
    def get_valid_labels(self, repo):

        # use the repo wrapper to enable caching+updating
        if not self.ghw:
            self.gh = self._connect()
            self.ghw = GithubWrapper(self.gh)

        rw = self.ghw.get_repo(repo)
        vlabels = []
        for vl in rw.get_labels():
            vlabels.append(vl.name)

        return vlabels

    def loop(self):
        '''Call the run method in a defined interval'''
        while True:
            self.run()
            self.ITERATION += 1
            interval = self.daemonize_interval
            logging.info('sleep %ss (%sm)' % (interval, interval / 60))
            time.sleep(interval)

    @abc.abstractmethod
    def run(self):
        pass

    def get_current_time(self):
        return datetime.utcnow()

    def render_boilerplate(self, tvars, boilerplate=None):
        template = environment.get_template('%s.j2' % boilerplate)
        comment = template.render(**tvars)
        return comment

    def apply_actions(self, iw, actions):

        action_meta = {'REDO': False}

        if actions.count() > 0:

            if self.dump_actions:
                self.dump_action_dict(iw, actions)

            if self.dry_run:
                print("Dry-run specified, skipping execution of actions")
            else:
                if self.force:
                    print("Running actions non-interactive as you forced.")
                    self.execute_actions(iw, actions)
                    return action_meta
                cont = input("Take recommended actions (y/N/a/R/T/DEBUG)? ")
                if cont in ('a', 'A'):
                    sys.exit(0)
                if cont in ('Y', 'y'):
                    self.execute_actions(iw, actions)
                if cont == 'T':
                    self.template_wizard(iw)
                    action_meta['REDO'] = True
                if cont in ('r', 'R'):
                    action_meta['REDO'] = True
                if cont == 'DEBUG':
                    # put the user into a breakpoint to do live debug
                    action_meta['REDO'] = True
                    import epdb; epdb.st()
        elif self.always_pause:
            print("Skipping, but pause.")
            cont = input("Continue (Y/n/a/R/T/DEBUG)? ")
            if cont in ('a', 'A', 'n', 'N'):
                sys.exit(0)
            if cont == 'T':
                self.template_wizard(iw)
                action_meta['REDO'] = True
            elif cont in ('r', 'R'):
                action_meta['REDO'] = True
            elif cont == 'DEBUG':
                # put the user into a breakpoint to do live debug
                import epdb; epdb.st()
                action_meta['REDO'] = True
        elif self.force_description_fixer:
            # FIXME: self.FIXED_ISSUES not defined since 1cf9674cd38edbd17aff906d72296c99043e5c13
            #        either define self.FIXED_ISSUES, either remove this method
            # FIXME force_description_fixer is not known by DefaultTriager (only
            #       by AnsibleTriage): if not removed, move it to AnsibleTriage
            if iw.html_url not in self.FIXED_ISSUES:
                if self.meta['template_missing_sections']:
                    changed = self.template_wizard(iw)
                    if changed:
                        action_meta['REDO'] = True
                self.FIXED_ISSUES.append(iw.html_url)
        else:
            print("Skipping.")

        # let the upper level code redo this issue
        return action_meta

    def template_wizard(self, iw):

        DF = DescriptionFixer(iw, self.meta)

        old = iw.body
        old_lines = old.split('\n')
        new = DF.new_description
        new_lines = new.split('\n')

        total_lines = len(new_lines)
        if len(old_lines) > total_lines:
            total_lines = len(old_lines)

        if len(new_lines) < total_lines:
            delta = total_lines - len(new_lines)
            for x in xrange(0, delta):
                new_lines.append('')

        if len(old_lines) < total_lines:
            delta = total_lines - len(old_lines)
            for x in xrange(0, delta):
                old_lines.append('')

        line = '--------------------------------------------------------'
        padding = 100
        print("%s|%s" % (line.ljust(padding), line))
        for c1, c2 in zip(old_lines, new_lines):
            if len(c1) > padding:
                c1 = c1[:padding-4]
            if len(c2) > padding:
                c2 = c2[:padding-4]
            print("%s|%s" % (c1.rstrip().ljust(padding), c2.rstrip()))
        print("%s|%s" % (line.rstrip().ljust(padding), line))

        print('# ' + iw.html_url)
        cont = input("Apply this new description? (Y/N) ")
        if cont == 'Y':
            iw.set_description(DF.new_description)
            return True
        else:
            return False

    def execute_actions(self, iw, actions):
        """Turns the actions into API calls"""

        for comment in actions.comments:
            logging.info("acton: comment - " + comment)
            iw.add_comment(comment=comment)
        if actions.close:
            # https://github.com/PyGithub/PyGithub/blob/master/github/Issue.py#L263
            logging.info('action: close')
            iw.instance.edit(state='closed')
            return

        for unlabel in actions.unlabel:
            logging.info('action: unlabel - ' + unlabel)
            iw.remove_label(label=unlabel)
        for newlabel in actions.newlabel:
            logging.info('action: label - ' + newlabel)
            iw.add_label(label=newlabel)

        for user in actions.assign:
            logging.info('action: assign - ' + user)
            iw.assign_user(user)

        for user in actions.unassign:
            logging.info('action: unassign - ' + user)
            iw.unassign_user(user)

        if actions.merge:
            iw.merge()

    #@RateLimited
    def is_pr_merged(self, number, repo):
        '''Check if a PR# has been merged or not'''

        if number is None:
            if C.DEFAULT_BREAKPOINTS:
                logging.error('breakpoint!')
                import epdb; epdb.st()
            raise Exception('Can not check merge state on the number: None')

        merged = False
        pr = None
        try:
            pr = repo.get_pullrequest(number)
        except Exception as e:
            print(e)
        if pr:
            try:
                merged = pr.merged
            except Exception as e:
                logging.debug(e)
                if C.DEFAULT_BREAKPOINTS:
                    logging.error('breakpoint!')
                    import epdb; epdb.st()
        return merged

    def trigger_rate_limit(self):
        '''Repeatedly make calls to exhaust rate limit'''

        self.gh = self._connect()
        self.ghw = GithubWrapper(self.gh)

        while True:
            cachedir = os.path.join(self.cachedir_base, self.repo)
            thisrepo = self.ghw.get_repo(self.repo, verbose=False)
            issues = thisrepo.repo.get_issues()
            rl = thisrepo.get_rate_limit()
            pprint(rl)

            for issue in issues:
                iw = IssueWrapper(
                        github=self.ghw,
                        repo=thisrepo,
                        issue=issue,
                        cachedir=cachedir
                )
                iw.history
                rl = thisrepo.get_rate_limit()
                pprint(rl)

    def dump_action_dict(self, issue, actions):
        '''Serialize the action dict to disk for quick(er) debugging'''
        fn = os.path.join(u'/tmp', u'actions', issue.repo_full_name, to_text(issue.number) + u'.json')
        dn = os.path.dirname(fn)
        if not os.path.isdir(dn):
            os.makedirs(dn)

        logging.info('dumping {}'.format(fn))
        with open(fn, 'wb') as f:
            f.write(json.dumps(actions, indent=2, sort_keys=True))