def __init__(self, args=None):
        parser = self.create_parser()
        self.args = parser.parse_args(args)

        logging.info('starting bot')
        self.set_logger()

        self.cachedir_base = os.path.expanduser(self.args.cachedir_base)
        self.issue_summaries = {}
        self.repos = {}

        # resume is just an overload for the start-at argument
        resume = self.get_resume()
        if resume:
            if self.args.sort == 'desc':
                self.args.start_at = resume['number'] - 1
            else:
                self.args.start_at = resume['number'] + 1

        logging.info('creating api wrapper')
        self.ghw = GithubWrapper(url=C.DEFAULT_GITHUB_URL,
                                 user=C.DEFAULT_GITHUB_USERNAME,
                                 passw=C.DEFAULT_GITHUB_PASSWORD,
                                 token=C.DEFAULT_GITHUB_TOKEN,
                                 cachedir=self.cachedir_base)

        logging.info('creating graphql client')
        self.gqlc = GithubGraphQLClient(C.DEFAULT_GITHUB_TOKEN,
                                        server=C.DEFAULT_GITHUB_URL)

        self._maintainer_team = None
Exemple #2
0
    def get_numbers(self):
        gq_cache_file = os.path.join(self.cachedir, 'gql_cache.json')

        if not os.path.exists(gq_cache_file):
            gqlc = GithubGraphQLClient(C.DEFAULT_GITHUB_TOKEN)
            summaries = gqlc.get_issue_summaries('ansible/ansible')
            with open(gq_cache_file, 'w') as f:
                f.write(json.dumps(summaries))
        else:
            with open(gq_cache_file, 'r') as f:
                summaries = json.loads(f.read())

        numbers = set()
        for k, v in summaries.items():
            #if v['state'] != 'open':
            #    continue
            numbers.add(v['number'])
        numbers = sorted(numbers, reverse=True)
        return numbers
Exemple #3
0
def main():

    set_logger()

    METAFILES = extract_metafiles()

    SKIP = load_skip()
    EXPECTED = load_expected()
    MATCH_MAP = load_match_map()

    ERRORS = []
    ERRORS_COMPONENTS = []

    start_at = None
    if len(sys.argv) == 2:
        start_at = int(sys.argv[1])

    FI = FileIndexer(checkoutdir=CACHEDIR)
    with open('/tmp/files.json', 'wb') as f:
        f.write(json.dumps(FI.files, indent=2))
    GQLC = GithubGraphQLClient(C.DEFAULT_GITHUB_TOKEN)
    MI = ModuleIndexer(cachedir=CACHEDIR,
                       gh_client=GQLC,
                       blames=False,
                       commits=False)

    CM = AnsibleComponentMatcher(cachedir=CACHEDIR)

    for k, v in MI.modules.items():
        if k in MATCH_MAP:
            MATCH_MAP.pop(k, None)
        kname = v.get('name')
        if kname not in MATCH_MAP:
            MATCH_MAP[kname] = v.get('repo_filename')
        if kname + ' module' not in MATCH_MAP:
            MATCH_MAP[kname + ' module'] = v.get('repo_filename')
        if kname + 'module: ' + kname not in MATCH_MAP:
            MATCH_MAP['module: ' + kname] = v.get('repo_filename')
        if kname + 'module ' + kname not in MATCH_MAP:
            MATCH_MAP['module ' + kname] = v.get('repo_filename')

        # /modules/remote_management/foreman/katello.py
        pname = k.replace('lib/ansible', '')
        if pname not in MATCH_MAP:
            MATCH_MAP[pname] = v.get('repo_filename')

        # ansible/modules/packaging/os/rpm_key.py
        pname = k.replace('lib/', '/')
        if pname not in MATCH_MAP:
            MATCH_MAP[pname] = v.get('repo_filename')

        # /ansible/modules/packaging/os/rpm_key.py
        pname = k.replace('lib/', '')
        if pname not in MATCH_MAP:
            MATCH_MAP[pname] = v.get('repo_filename')

        # ansible/lib/ansible/modules/monitoring/monit.py
        pname = 'ansible/' + k
        if pname not in MATCH_MAP:
            MATCH_MAP[pname] = v.get('repo_filename')

        # network/f5/bigip_gtm_wide_ip
        pname = k.replace('lib/ansible/modules/', '')
        pname = pname.replace('.py', '')
        pname = pname.replace('.ps1', '')
        if pname not in MATCH_MAP:
            MATCH_MAP[pname] = v.get('repo_filename')

        # network/f5/bigip_gtm_wide_ip.py
        pname = k.replace('lib/ansible/modules/', '')
        if pname not in MATCH_MAP:
            MATCH_MAP[pname] = v.get('repo_filename')

        # modules/packaging/os/pkgng.py
        pname = k.replace('lib/ansible/', '')
        if pname not in MATCH_MAP:
            MATCH_MAP[pname] = v.get('repo_filename')

    save_match_map(MATCH_MAP)

    total = len(METAFILES)
    for IDMF, MF in enumerate(METAFILES):

        if start_at and IDMF < start_at:
            continue

        with open(MF, 'rb') as f:
            meta = json.loads(f.read())

        if not meta.get('is_issue'):
            continue

        component = meta.get('template_data', {}).get('component_raw')

        #if component != 'Module `synchronize`':
        #if component != 'Module: include_role':
        #    continue

        if component:
            print(f'------------------------------------------ {total}|{IDMF}')
            print(meta['html_url'])
            print(meta['title'])
            print(component)

            hurl = meta['html_url']
            if hurl in SKIP:
                continue

            # bad template or bad template parsing
            if len(component) > 100:
                continue

            iw = IssueWrapperMock(meta)
            if 'module' not in iw.body.lower(
            ) and 'module' not in iw.title.lower():
                continue

            expected_fns = []

            # OLD METHOD
            if hurl not in EXPECTED and component not in MATCH_MAP:
                cmf = get_component_match_facts(iw, meta, FI, MI, LABELS)
                expected_fns = cmf.get('module_match')
                if not isinstance(expected_fns, list):
                    expected_fns = [expected_fns]
                expected_fns = [x['repo_filename'] for x in expected_fns if x]
                if 'component_matches' in cmf:
                    expected_fns = [
                        x['filename'] for x in cmf['component_matches']
                    ]
                expected_fns = sorted(set(expected_fns))

            # NEW METHOD
            cmr = CM.match_components(iw.title, iw.body,
                                      iw.template_data.get('component_raw'))
            cmr_fns = [x['repo_filename'] for x in cmr if x]
            cmr_fns = sorted(set(cmr_fns))

            # VALIDATE FROM EXPECTED IF KNOWN
            if hurl in EXPECTED:
                if EXPECTED[hurl] and not isinstance(EXPECTED[hurl], list):
                    expected_fns = [EXPECTED[hurl]]
                elif EXPECTED[hurl]:
                    expected_fns = EXPECTED[hurl]
                else:
                    expected_fns = []

            # USE THE CACHED MAP
            if component in MATCH_MAP:
                expected_fns = MATCH_MAP[component]
                if not isinstance(expected_fns, list):
                    expected_fns = [expected_fns]
            elif component.lower() in MATCH_MAP:
                expected_fns = MATCH_MAP[component.lower()]
                if not isinstance(expected_fns, list):
                    expected_fns = [expected_fns]
            elif component.startswith(':\n') and component.endswith(' module'):
                mapkey = component.lstrip(':\n')
                if mapkey in MATCH_MAP:
                    expected_fns = MATCH_MAP[mapkey]
                    if not isinstance(expected_fns, list):
                        expected_fns = [expected_fns]

            # OLD CODE USED ACTION PLUGINS INSTEAD OF MODULES
            if expected_fns != cmr_fns and hurl not in EXPECTED:
                if len(expected_fns) == 1 and len(
                        cmr_fns) == 1 and 'plugins/action' in expected_fns[0]:
                    e_bn = os.path.basename(expected_fns[0])
                    c_bn = os.path.basename(cmr_fns[0])
                    if e_bn == c_bn:
                        MATCH_MAP[component] = cmr_fns
                        save_match_map(MATCH_MAP)
                        continue

            # DOCS URLS
            if expected_fns != cmr_fns and hurl not in EXPECTED:
                if len(cmr_fns) == 1 and 'lib/ansible/modules' in cmr_fns[0]:
                    c_bn = os.path.basename(cmr_fns[0])
                    if f'docs.ansible.com/ansible/latest/{c_bn}_module.html' in component:
                        MATCH_MAP[component] = cmr_fns
                        save_match_map(MATCH_MAP)
                        continue
                    elif CM.strategy in ['search_by_regex_urls']:
                        MATCH_MAP[component] = cmr_fns
                        save_match_map(MATCH_MAP)
                        continue

            # NXOS ISSUES HAVE NXOS_VERSION HEADER
            if '- nxos' in component:
                if len(cmr_fns) == 1:
                    if os.path.basename(cmr_fns[0]).replace('.py',
                                                            '') in component:
                        MATCH_MAP[component] = cmr_fns
                        save_match_map(MATCH_MAP)
                        continue
                #import epdb; epdb.st()

            # ODDBALL MODULE COMPONENTS
            if len(cmr_fns) == 1 and 'lib/ansible/modules' in cmr_fns[0]:
                bn = os.path.basename(cmr_fns[0])
                bn = bn.replace('.py', '')
                bn = bn.replace('.ps1', '')
                if (bn in component or bn.lstrip('_')
                        in component) and 'module' in component.lower():
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)
                    continue
                elif component == '- ' + bn:
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)
                    continue
                elif component == bn + '.py' or component == bn + '.ps1':
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)
                    continue
                elif component == '_' + bn + '.py' or component == '_' + bn + '.ps1':
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)
                    continue
                elif component == ':\n' + bn or component == ':\n' + bn.lstrip(
                        '_'):
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)
                    continue

            # 'multiple modules', etc ...
            if component in CM.KEYWORDS or component.lower() in CM.KEYWORDS:
                if component in CM.KEYWORDS and CM.KEYWORDS[
                        component] is None and not cmr_fns:
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)
                    continue
                elif component.lower() in CM.KEYWORDS and CM.KEYWORDS[
                        component.lower()] is None and not cmr_fns:
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)
                    continue
                elif len(cmr_fns) == 1 and cmr_fns[0] == CM.KEYWORDS.get(
                        component):
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)
                    continue
                elif len(cmr_fns) == 1 and cmr_fns[0] == CM.KEYWORDS.get(
                        component.lower()):
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)
                    continue

            if component.lstrip('-').strip() in CM.KEYWORDS and len(
                    cmr_fns) == 1:
                cname = component.lstrip('-').strip()
                if CM.KEYWORDS[cname] == cmr_fns[0]:
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)
                    continue

            if component.endswith(' lookup') and len(
                    cmr_fns
            ) == 1 and 'lib/ansible/plugins/lookup' in cmr_fns[0]:
                MATCH_MAP[component] = cmr_fns
                save_match_map(MATCH_MAP)
                continue

            if component.endswith(' inventory script') and len(
                    cmr_fns) == 1 and 'contrib/inventory' in cmr_fns[0]:
                MATCH_MAP[component] = cmr_fns
                save_match_map(MATCH_MAP)
                continue

            if component.startswith('ansible/lib') and len(cmr_fns) == 1:
                fn = cmr_fns[0]
                if 'ansible/' + fn == component:
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)
                    continue

            if component.endswith(' inventory plugin') and len(cmr_fns) == 1:
                fn = cmr_fns[0]
                if fn.startswith('lib/ansible/plugins/inventory'):
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)
                    continue

            if component == 'ec2.py' and cmr_fns and 'contrib/inventory/ec2.py' in cmr_fns:
                MATCH_MAP[component] = cmr_fns
                save_match_map(MATCH_MAP)
                continue

            if len(expected_fns) == 1 and len(cmr_fns) == 1:
                if os.path.basename(expected_fns[0]) == os.path.basename(
                        cmr_fns[0]):
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)
                    continue

            # COMPARE AND RECORD
            if expected_fns != cmr_fns and hurl not in EXPECTED:

                if component in MATCH_MAP or component.lower() in MATCH_MAP:
                    if component.lower() in MATCH_MAP:
                        mmc = MATCH_MAP[component.lower()]
                    else:
                        mmc = MATCH_MAP[component]
                    if not isinstance(mmc, list):
                        mmc == [mmc]
                    if mmc == cmr_fns:
                        EXPECTED[iw.html_url] = cmr_fns
                        save_expected(EXPECTED)
                        continue

                print('## COMPONENT ...')
                print(component)
                print('## EXPECTED ...')
                pprint(expected_fns)
                print('## RESULT ...')
                pprint(cmr_fns)
                print('## STRATEGIES ..')
                pprint(CM.strategy)
                pprint(CM.strategies)

                print('--------------------------------')
                res = raw_input('Is the result correct? (y/n/s/d): ')
                if res.lower() in ['y', 'yes']:
                    MATCH_MAP[component] = cmr_fns
                    EXPECTED[iw.html_url] = cmr_fns
                    save_expected(EXPECTED)
                    continue
                elif res.lower() in ['s', 'skip']:
                    SKIP.append(hurl)
                    save_skip(SKIP)
                    continue
                elif res.lower() in ['d', 'debug']:
                    import epdb
                    epdb.st()

                ERRORS.append(iw.html_url)
                ERRORS_COMPONENTS.append({
                    'url':
                    iw.html_url,
                    'component':
                    component,
                    'component_raw':
                    iw.template_data.get('component_raw'),
                    'result':
                    cmr_fns,
                    'expected':
                    expected_fns,
                    'strategy':
                    CM.strategy,
                    'strategies':
                    CM.strategies
                })

            else:

                if component not in MATCH_MAP:
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)

                if hurl not in EXPECTED:
                    EXPECTED[hurl] = cmr_fns
                    save_expected(EXPECTED)

            continue

    pprint(ERRORS)
    fn = os.path.join(FIXTUREDIR, 'component_errors.json')
    with open(fn, 'wb') as f:
        f.write(json.dumps(ERRORS_COMPONENTS, indent=2, sort_keys=True))

    clean_metafiles(METAFILES)
Exemple #4
0
def main():

    logging.level = logging.DEBUG
    logFormatter = logging.Formatter("%(asctime)s %(levelname)s %(message)s")
    rootLogger = logging.getLogger()
    rootLogger.setLevel(logging.DEBUG)
    consoleHandler = logging.StreamHandler()
    consoleHandler.setFormatter(logFormatter)
    rootLogger.addHandler(consoleHandler)

    summaries = None
    gq_cache_file = '/tmp/gql_cache.json'

    if not os.path.exists(gq_cache_file):
        gqlc = GithubGraphQLClient(C.DEFAULT_GITHUB_TOKEN)
        summaries = gqlc.get_issue_summaries('ansible/ansible')
        with open(gq_cache_file, 'w') as f:
            f.write(json.dumps(summaries))
    else:
        with open(gq_cache_file, 'r') as f:
            summaries = json.loads(f.read())

    numbers = set()
    for k, v in summaries.items():
        if v['state'] != 'open':
            continue
        numbers.add(v['number'])
    numbers = sorted(numbers, reverse=True)

    gh = GithubWrapper(None, token=C.DEFAULT_GITHUB_TOKEN)

    for idn, number in enumerate(numbers):
        logging.info('%s|%s issue %s' % (len(numbers), idn + 1, number))

        if number > 52979:
            continue

        comments_url = 'https://api.github.com/repos/ansible/ansible/issues/%s/comments' % number
        comments = gh.get_request(comments_url)

        duplicates = {}
        for comment in comments:
            if comment['user']['login'] != 'ansibot':
                continue
            if comment['body'] not in duplicates:
                duplicates[comment['body']] = []
            duplicates[comment['body']].append(comment['id'])

        if duplicates:
            topop = []
            for k, v in duplicates.items():
                if len(v) <= 1:
                    topop.append(k)
            for tp in topop:
                duplicates.pop(tp, None)

            if duplicates:
                for k, v in duplicates.items():
                    dupes = [x for x in comments if x['id'] in v]
                    dupes = sorted(dupes, key=lambda x: x['created_at'])

                    pprint([[x['id'], x['body']] for x in dupes])

                    #if '<!--- boilerplate: notify --->' not in dupes[0]['body']:
                    #    continue

                    #import epdb; epdb.st()

                    for dupe in dupes[1:]:
                        gh.delete_request(dupe['url'])
                    time.sleep(1)
class DefaultTriager:
    """
    How to use:
    1. Create a new class which inherits from DefaultTriager
    2. Implement 'Triager.run(self)' method:
        - iterate over issues/pull requests
        - for each issue
        1. create 'actions = DefaultActions()'
        2. define which action(s) should be done updating 'actions' instance
        3. call parent 'apply_actions' methods: 'DefaultTriager.apply_actions(iw, actions)'
    3. Run:
    def main():
        Triager().start()
    """
    CLOSING_LABELS = []

    def __init__(self, args=None):
        parser = self.create_parser()
        self.args = parser.parse_args(args)

        logging.info('starting bot')
        self.set_logger()

        self.cachedir_base = os.path.expanduser(self.args.cachedir_base)
        self.issue_summaries = {}
        self.repos = {}

        # resume is just an overload for the start-at argument
        resume = self.get_resume()
        if resume:
            if self.args.sort == 'desc':
                self.args.start_at = resume['number'] - 1
            else:
                self.args.start_at = resume['number'] + 1

        logging.info('creating api wrapper')
        self.ghw = GithubWrapper(url=C.DEFAULT_GITHUB_URL,
                                 user=C.DEFAULT_GITHUB_USERNAME,
                                 passw=C.DEFAULT_GITHUB_PASSWORD,
                                 token=C.DEFAULT_GITHUB_TOKEN,
                                 cachedir=self.cachedir_base)

        logging.info('creating graphql client')
        self.gqlc = GithubGraphQLClient(C.DEFAULT_GITHUB_TOKEN,
                                        server=C.DEFAULT_GITHUB_URL)

        self._maintainer_team = None

    @property
    def maintainer_team(self):
        # Note: this assumes that the token used by the bot has access to check
        # team privileges across potentially more than one organization
        if self._maintainer_team is None:
            self._maintainer_team = []
            teams = C.DEFAULT_GITHUB_MAINTAINERS
            for team in teams:
                _org, _team = team.split('/')
                self._maintainer_team.extend(self.ghw.get_members(_org, _team))
        return sorted(
            set(self._maintainer_team).difference(C.DEFAULT_BOT_NAMES))

    @classmethod
    def create_parser(cls):
        parser = argparse.ArgumentParser()
        parser.add_argument(
            "--botmetafile",
            type=str,
            default=None,
            help="Use this filepath for botmeta instead of from the repo")
        parser.add_argument("--cachedir",
                            type=str,
                            dest='cachedir_base',
                            default='~/.ansibullbot/cache')
        parser.add_argument("--daemonize",
                            action="store_true",
                            help="run in a continuos loop")
        parser.add_argument("--daemonize_interval",
                            type=int,
                            default=(30 * 60),
                            help="seconds to sleep between loop iterations")
        parser.add_argument("--debug",
                            "-d",
                            action="store_true",
                            help="Debug output")
        parser.add_argument("--dry-run",
                            "-n",
                            action="store_true",
                            help="Don't make any changes")
        parser.add_argument(
            "--dump_actions",
            action="store_true",
            help="serialize the actions to disk [/tmp/actions]")
        parser.add_argument("--force",
                            "-f",
                            action="store_true",
                            help="Do not ask questions")
        parser.add_argument("--logfile",
                            type=str,
                            default='/var/log/ansibullbot.log',
                            help="Send logging to this file")
        parser.add_argument("--ignore_state",
                            action="store_true",
                            help="Do not skip processing closed issues")
        parser.add_argument("--last",
                            type=int,
                            help="triage the last N issues or PRs")
        parser.add_argument("--only_closed",
                            action="store_true",
                            help="Triage closed issues|prs only")
        parser.add_argument("--only_issues",
                            action="store_true",
                            help="Triage issues only")
        parser.add_argument("--only_prs",
                            action="store_true",
                            help="Triage pullrequests only")
        parser.add_argument("--pause",
                            "-p",
                            action="store_true",
                            dest="always_pause",
                            help="Always pause between prs|issues")
        parser.add_argument(
            "--pr",
            "--id",
            type=str,
            help="Triage only the specified pr|issue (separated by commas)")
        parser.add_argument(
            "--resume",
            action="store_true",
            dest="resume_enabled",
            help="pickup right after where the bot last stopped")
        parser.add_argument("--repo",
                            "-r",
                            type=str,
                            help="Github repo to triage (defaults to all)")
        parser.add_argument("--skiprepo",
                            action='append',
                            help="Github repo to skip triaging")
        parser.add_argument("--start-at",
                            type=int,
                            help="Start triage at the specified pr|issue")
        parser.add_argument("--sort",
                            default='desc',
                            choices=['asc', 'desc'],
                            help="Direction to sort issues [desc=9-0 asc=0-9]")
        return parser

    def set_logger(self):
        set_logger(debug=self.args.debug, logfile=self.args.logfile)

    def start(self):
        if self.args.daemonize:
            logging.info('starting daemonize loop')
            self.loop()
        else:
            logging.info('starting single run')
            self.run()
        logging.info('stopping bot')

    def loop(self):
        """Call the run method in a defined interval"""
        while True:
            self.run()
            interval = self.args.daemonize_interval
            logging.info('sleep %ss (%sm)' % (interval, interval / 60))
            time.sleep(interval)

    @abc.abstractmethod
    def run(self):
        pass

    def render_boilerplate(self, tvars, boilerplate=None):
        template = environment.get_template('%s.j2' % boilerplate)
        comment = template.render(**tvars)
        return comment

    def apply_actions(self, iw, actions):
        action_meta = {'REDO': False}

        if actions.count() > 0:
            if self.args.dump_actions:
                self.dump_action_dict(iw, actions.__dict__)

            if self.args.dry_run:
                print("Dry-run specified, skipping execution of actions")
            else:
                if self.args.force:
                    print("Running actions non-interactive as you forced.")
                    self.execute_actions(iw, actions)
                    return action_meta
                cont = input("Take recommended actions (y/N/a/R/DEBUG)? ")
                if cont in ('a', 'A'):
                    sys.exit(0)
                if cont in ('Y', 'y'):
                    self.execute_actions(iw, actions)
                if cont in ('r', 'R'):
                    action_meta['REDO'] = True
                if cont == 'DEBUG':
                    # put the user into a breakpoint to do live debug
                    action_meta['REDO'] = True
                    import epdb
                    epdb.st()
        elif self.args.always_pause:
            print("Skipping, but pause.")
            cont = input("Continue (Y/n/a/R/DEBUG)? ")
            if cont in ('a', 'A', 'n', 'N'):
                sys.exit(0)
            elif cont in ('r', 'R'):
                action_meta['REDO'] = True
            elif cont == 'DEBUG':
                # put the user into a breakpoint to do live debug
                import epdb
                epdb.st()
                action_meta['REDO'] = True
        else:
            print("Skipping.")

        # let the upper level code redo this issue
        return action_meta

    def execute_actions(self, iw, actions):
        """Turns the actions into API calls"""
        for commentid in actions.uncomment:
            iw.remove_comment_by_id(commentid)

        for comment in actions.comments:
            logging.info("acton: comment - " + comment)
            iw.add_comment(comment=comment)

        if actions.close:
            for newlabel in actions.newlabel:
                if newlabel in self.CLOSING_LABELS:
                    logging.info('action: label - ' + newlabel)
                    iw.add_label(label=newlabel)

            logging.info('action: close')
            iw.instance.edit(state='closed')

        else:
            for unlabel in actions.unlabel:
                logging.info('action: unlabel - ' + unlabel)
                iw.remove_label(label=unlabel)
            for newlabel in actions.newlabel:
                logging.info('action: label - ' + newlabel)
                iw.add_label(label=newlabel)

            if actions.merge:
                iw.merge()

    def dump_action_dict(self, issue, actions):
        """Serialize the action dict to disk for quick(er) debugging"""
        fn = os.path.join('/tmp', 'actions', issue.repo_full_name,
                          str(issue.number) + '.json')
        dn = os.path.dirname(fn)
        if not os.path.isdir(dn):
            os.makedirs(dn)

        logging.info(f'dumping {fn}')
        with open(fn, 'w') as f:
            f.write(json.dumps(actions, indent=2, sort_keys=True))

    def get_resume(self):
        '''Returns a dict with the last issue repo+number processed'''
        if self.args.pr or not self.args.resume_enabled:
            return

        resume_file = os.path.join(self.cachedir_base, 'resume.json')
        if not os.path.isfile(resume_file):
            logging.error('Resume: %r not found', resume_file)
            return None

        logging.debug('Resume: read %r', resume_file)
        with open(resume_file, 'r', encoding='utf-8') as f:
            data = json.loads(f.read())
        return data

    def set_resume(self, repo, number):
        if self.args.pr or not self.args.resume_enabled:
            return

        data = {'repo': repo, 'number': number}
        resume_file = os.path.join(self.cachedir_base, 'resume.json')
        with open(resume_file, 'w', encoding='utf-8') as f:
            json.dump(data, f)

    def eval_pr_param(self, pr):
        '''PR/ID can be a number, numberlist, script, jsonfile, or url'''

        if isinstance(pr, list):
            pass

        elif pr.isdigit():
            pr = int(pr)

        elif pr.startswith('http'):
            rr = requests.get(pr)
            numbers = rr.json()
            pr = numbers[:]

        elif os.path.isfile(pr) and not os.access(pr, os.X_OK):
            with open(pr) as f:
                numbers = json.loads(f.read())
            pr = numbers[:]

        elif os.path.isfile(pr) and os.access(pr, os.X_OK):
            # allow for scripts when trying to target spec issues
            logging.info('executing %s' % pr)
            (rc, so, se) = run_command(pr)
            numbers = json.loads(to_text(so))
            if numbers:
                if isinstance(numbers[0], dict) and 'number' in numbers[0]:
                    numbers = [x['number'] for x in numbers]
                else:
                    numbers = [int(x) for x in numbers]
            logging.info('%s numbers after running script' % len(numbers))
            pr = numbers[:]

        elif ',' in pr:
            numbers = [int(x) for x in pr.split(',')]
            pr = numbers[:]

        if not isinstance(pr, list):
            pr = [pr]

        return pr

    def update_issue_summaries(self, repopath=None, issuenums=None):
        if issuenums and len(issuenums) <= 10:
            self.issue_summaries[repopath] = {}

            for num in issuenums:
                # --pr is an alias to --id and can also be for issues
                node = self.gqlc.get_summary(repopath, 'pullRequest', num)
                if node is None:
                    node = self.gqlc.get_summary(repopath, 'issue', num)
                if node is not None:
                    self.issue_summaries[repopath][to_text(num)] = node
        else:
            self.issue_summaries[repopath] = self.gqlc.get_issue_summaries(
                repopath)

    def get_stale_numbers(self, reponame):
        stale = []
        for number, summary in self.issue_summaries[reponame].items():
            if number in stale:
                continue

            if summary['state'] == 'closed':
                continue

            number = int(number)
            mfile = os.path.join(self.cachedir_base, reponame, 'issues',
                                 to_text(number), 'meta.json')

            if not os.path.isfile(mfile):
                stale.append(number)
                continue

            try:
                with open(mfile, 'rb') as f:
                    meta = json.load(f)
            except ValueError as e:
                logging.error('failed to parse %s: %s' %
                              (to_text(mfile), to_text(e)))
                os.remove(mfile)
                stale.append(number)
                continue

            delta = (datetime.datetime.now() -
                     strip_time_safely(meta['time'])).days
            if delta > C.DEFAULT_STALE_WINDOW:
                stale.append(number)

        stale = sorted({int(x) for x in stale})
        if 10 >= len(stale) > 0:
            logging.info('stale: %s' % ','.join([to_text(x) for x in stale]))

        return stale

    @RateLimited
    def _collect_repo(self, repo, issuenums=None):
        '''Collect issues for an individual repo'''
        logging.info('getting repo obj for %s' % repo)
        if repo not in self.repos:
            gitrepo = GitRepoWrapper(
                cachedir=self.cachedir_base,
                repo=f'https://github.com/{repo}',
                commit=self.args.ansible_commit,
            )
            self.repos[repo] = {
                'repo': self.ghw.get_repo(repo),
                'issues': [],
                'processed': [],
                'since': None,
                'stale': [],
                'loopcount': 0,
                'labels': self.ghw.get_valid_labels(repo),
                'gitrepo': gitrepo,
            }
        else:
            # force a clean repo object to limit caching problems
            logging.info('updating repo')
            self.repos[repo]['repo'] = self.ghw.get_repo(repo)
            logging.info('updating checkout')
            self.repos[repo]['gitrepo'].update()

            # clear the issues
            self.repos[repo]['issues'] = {}
            # increment the loopcount
            self.repos[repo]['loopcount'] += 1

        logging.info('getting issue objs for %s' % repo)
        self.update_issue_summaries(repopath=repo, issuenums=issuenums)

        issuecache = {}
        numbers = self.issue_summaries[repo].keys()
        numbers = {int(x) for x in numbers}
        if issuenums:
            numbers.intersection_update(issuenums)
            numbers = list(numbers)
        logging.info('%s known numbers' % len(numbers))

        if self.args.daemonize:

            if not self.repos[repo]['since']:
                ts = [
                    x[1]['updated_at']
                    for x in self.issue_summaries[repo].items()
                    if x[1]['updated_at']
                ]
                ts += [
                    x[1]['created_at']
                    for x in self.issue_summaries[repo].items()
                    if x[1]['created_at']
                ]
                ts = sorted(set(ts))
                if ts:
                    self.repos[repo]['since'] = ts[-1]
            else:
                since = strip_time_safely(self.repos[repo]['since'])
                api_since = self.repos[repo]['repo'].get_issues(since=since)

                numbers = []
                for x in api_since:
                    numbers.append(x.number)
                    issuecache[x.number] = x

                numbers = sorted({int(n) for n in numbers})
                logging.info('%s numbers after [api] since == %s' %
                             (len(numbers), since))

                for k, v in self.issue_summaries[repo].items():
                    if v['created_at'] is None:
                        # issue is closed and was never processed
                        continue

                    if v['created_at'] > self.repos[repo]['since']:
                        numbers.append(k)

                numbers = sorted({int(n) for n in numbers})
                logging.info('%s numbers after [www] since == %s' %
                             (len(numbers), since))

        if self.args.start_at and self.repos[repo]['loopcount'] == 0:
            numbers = [x for x in numbers if x <= self.args.start_at]
            logging.info('%s numbers after start-at' % len(numbers))

        # Get stale numbers if not targeting
        if self.args.daemonize and self.repos[repo]['loopcount'] > 0:
            logging.info('checking for stale numbers')
            stale = self.get_stale_numbers(repo)
            self.repos[repo]['stale'] = [int(x) for x in stale]
            numbers += [int(x) for x in stale]
            numbers = sorted(set(numbers))
            logging.info('%s numbers after stale check' % len(numbers))

        ################################################################
        # PRE-FILTERING TO PREVENT EXCESSIVE API CALLS
        ################################################################

        # filter just the open numbers
        if not self.args.only_closed and not self.args.ignore_state:
            numbers = [
                x for x in numbers
                if (to_text(x) in self.issue_summaries[repo] and
                    self.issue_summaries[repo][to_text(x)]['state'] == 'open')
            ]
            logging.info('%s numbers after checking state' % len(numbers))

        # filter by type
        if self.args.only_issues:
            numbers = [
                x for x in numbers
                if self.issue_summaries[repo][to_text(x)]['type'] == 'issue'
            ]
            logging.info('%s numbers after checking type' % len(numbers))
        elif self.args.only_prs:
            numbers = [
                x for x in numbers if self.issue_summaries[repo][to_text(x)]
                ['type'] == 'pullrequest'
            ]
            logging.info('%s numbers after checking type' % len(numbers))

        numbers = sorted({int(x) for x in numbers})
        if self.args.sort == 'desc':
            numbers = [x for x in reversed(numbers)]

        if self.args.last and len(numbers) > self.args.last:
            numbers = numbers[0 - self.args.last:]

        # Use iterator to avoid requesting all issues upfront
        self.repos[repo]['issues'] = RepoIssuesIterator(
            self.repos[repo]['repo'], numbers, issuecache=issuecache)

        logging.info('getting repo objs for %s complete' % repo)

    def collect_repos(self):
        '''Populate the local cache of repos'''
        logging.info('start collecting repos')
        for repo in C.DEFAULT_GITHUB_REPOS:
            # skip repos based on args
            if self.args.repo and self.args.repo != repo:
                continue
            if self.args.skiprepo:
                if repo in self.args.skiprepo:
                    continue

            if self.args.pr:
                numbers = self.eval_pr_param(self.args.pr)
                self._collect_repo(repo, issuenums=numbers)
            else:
                self._collect_repo(repo)
        logging.info('finished collecting issues')