コード例 #1
0
ファイル: galaxy.py プロジェクト: sasubbar/ansibullbot
    def index_ecosystem(self):
        # index the ansible-collections org
        token = C.DEFAULT_GITHUB_TOKEN
        gh = Github(login_or_token=token)
        gw = GithubWrapper(gh, cachedir=self.cachedir)
        ac = gw.get_org('ansible-collections')

        cloneurls = set()
        for repo in ac.get_repos():
            #print(repo)
            cloneurls.add(repo.clone_url)
        cloneurls = [x.replace('.git', '') for x in cloneurls]

        for curl in cloneurls:
            if curl.endswith('/overview'):
                continue
            if curl.endswith('/collection_template'):
                continue
            if curl.endswith('/.github'):
                continue
            if curl.endswith('/hub'):
                continue
            grepo = GitRepoWrapper(cachedir=self.cachedir,
                                   repo=curl,
                                   rebase=False)

            # is there a galaxy.yml at the root level?
            if grepo.exists('galaxy.yml'):
                meta = yaml.load(grepo.get_file_content('galaxy.yml'))
                fqcn = '%s.%s' % (meta['namespace'], meta['name'])
                self._gitrepos[fqcn] = grepo
            else:
                # multi-collection repos ... sigh.
                galaxyfns = grepo.find('galaxy.yml')

                if galaxyfns:
                    for gfn in galaxyfns:
                        meta = yaml.load(grepo.get_file_content(gfn))
                        fqcn = '%s.%s' % (meta['namespace'], meta['name'])
                        _grepo = GitRepoWrapper(cachedir=self.cachedir,
                                                repo=curl,
                                                rebase=False,
                                                context=os.path.dirname(gfn))
                        self._gitrepos[fqcn] = _grepo
                else:

                    fqcn = None
                    bn = os.path.basename(curl)

                    # enumerate the url?
                    if '.' in bn:
                        fqcn = bn

                    # try the README?
                    if fqcn is None:
                        for fn in ['README.rst', 'README.md']:
                            if fqcn:
                                break
                            if not grepo.exists(fn):
                                continue
                            fdata = grepo.get_file_content(fn)
                            if not '.' in fdata:
                                continue
                            lines = fdata.split('\n')
                            for line in lines:
                                line = line.strip()
                                if line.lower().startswith(
                                        'ansible collection:'):
                                    fqcn = line.split(':')[-1].strip()
                                    break

                    # lame ...
                    if fqcn is None:
                        fqcn = bn + '._community'

                    self._gitrepos[fqcn] = grepo

        # scrape the galaxy collections api
        nexturl = self._baseurl + '/api/v2/collections/?page_size=1000'
        while nexturl:
            jdata = self._get_cached_url(nexturl)
            nexturl = jdata.get('next_link')
            if nexturl:
                nexturl = self._baseurl + nexturl

            for res in jdata.get('results', []):
                fqcn = '%s.%s' % (res['namespace']['name'], res['name'])
                if res.get('deprecated'):
                    continue
                if fqcn in self._gitrepos:
                    continue
                lv = res['latest_version']['href']
                lvdata = self._get_cached_url(lv)
                rurl = lvdata.get('metadata', {}).get('repository')
                if rurl is None:
                    rurl = lvdata['download_url']
                grepo = GitRepoWrapper(cachedir=self.cachedir,
                                       repo=rurl,
                                       rebase=False)
                self._gitrepos[fqcn] = grepo

        # reconcile all things ...
        self.GALAXY_FQCNS = sorted(set(self._gitrepos.keys()))
        self.GALAXY_FILES = {}
        for fqcn, gr in self._gitrepos.items():
            if fqcn.startswith('testing.'):
                continue
            for fn in gr.files:
                if fn not in self.GALAXY_FILES:
                    self.GALAXY_FILES[fn] = set()
                self.GALAXY_FILES[fn].add(fqcn)
コード例 #2
0
class AnsibleComponentMatcher(object):

    BOTMETA = {}
    INDEX = {}
    REPO = 'https://github.com/ansible/ansible'
    STOPWORDS = ['ansible', 'core', 'plugin']
    STOPCHARS = ['"', "'", '(', ')', '?', '*', '`', ',', ':', '?', '-']
    BLACKLIST = ['new module', 'new modules']
    FILE_NAMES = []
    MODULES = {}
    MODULE_NAMES = []
    MODULE_NAMESPACE_DIRECTORIES = []

    # FIXME: THESE NEED TO GO INTO BOTMETA
    # ALSO SEE search_by_regex_generic ...
    KEYWORDS = {
        'all': None,
        'ansiballz': 'lib/ansible/executor/module_common.py',
        'ansible-console': 'lib/ansible/cli/console.py',
        'ansible-galaxy': 'lib/ansible/galaxy',
        'ansible-inventory': 'lib/ansible/cli/inventory.py',
        'ansible-playbook': 'lib/ansible/playbook',
        'ansible playbook': 'lib/ansible/playbook',
        'ansible playbooks': 'lib/ansible/playbook',
        'ansible-pull': 'lib/ansible/cli/pull.py',
        'ansible-vault': 'lib/ansible/parsing/vault',
        'ansible-vault edit': 'lib/ansible/parsing/vault',
        'ansible-vault show': 'lib/ansible/parsing/vault',
        'ansible-vault decrypt': 'lib/ansible/parsing/vault',
        'ansible-vault encrypt': 'lib/ansible/parsing/vault',
        'async': 'lib/ansible/modules/utilities/logic/async_wrapper.py',
        'become': 'lib/ansible/playbook/become.py',
        'block': 'lib/ansible/playbook/block.py',
        'blocks': 'lib/ansible/playbook/block.py',
        'callback plugin': 'lib/ansible/plugins/callback',
        'callback plugins': 'lib/ansible/plugins/callback',
        'conditional': 'lib/ansible/playbook/conditional.py',
        'docs': 'docs',
        'delegate_to': 'lib/ansible/playbook/task.py',
        'facts': 'lib/ansible/module_utils/facts',
        'galaxy': 'lib/ansible/galaxy',
        'groupvars': 'lib/ansible/vars/hostvars.py',
        'group vars': 'lib/ansible/vars/hostvars.py',
        'handlers': 'lib/ansible/playbook/handler.py',
        'hostvars': 'lib/ansible/vars/hostvars.py',
        'host vars': 'lib/ansible/vars/hostvars.py',
        'integration tests': 'test/integration',
        'inventory script': 'contrib/inventory',
        'jinja2 template system': 'lib/ansible/template',
        'module_utils': 'lib/ansible/module_utils',
        'multiple modules': None,
        'new module(s) request': None,
        'new modules request': None,
        'new module request': None,
        'new module': None,
        'network_cli': 'lib/ansible/plugins/connection/network_cli.py',
        'network_cli.py': 'lib/ansible/plugins/connection/network_cli.py',
        'network modules': 'lib/ansible/modules/network',
        'paramiko': 'lib/ansible/plugins/connection/paramiko_ssh.py',
        'role': 'lib/ansible/playbook/role',
        'roles': 'lib/ansible/playbook/role',
        'ssh': 'lib/ansible/plugins/connection/ssh.py',
        'ssh authentication': 'lib/ansible/plugins/connection/ssh.py',
        'setup / facts': 'lib/ansible/modules/system/setup.py',
        'setup': 'lib/ansible/modules/system/setup.py',
        'task executor': 'lib/ansible/executor/task_executor.py',
        'testing': 'test/',
        'validate-modules': 'test/sanity/validate-modules',
        'vault': 'lib/ansible/parsing/vault',
        'vault edit': 'lib/ansible/parsing/vault',
        'vault documentation': 'lib/ansible/parsing/vault',
        'with_items': 'lib/ansible/playbook/loop_control.py',
        'windows modules': 'lib/ansible/modules/windows',
        'winrm': 'lib/ansible/plugins/connection/winrm.py'
    }

    def __init__(self, gitrepo=None, botmetafile=None, cachedir=None, email_cache=None, file_indexer=None):
        self.cachedir = cachedir
        self.botmetafile = botmetafile
        self.email_cache = email_cache

        if file_indexer:
            self.file_indexer = file_indexer
        else:
            self.file_indexer = FileIndexer(
                botmetafile=self.botmetafile,
                checkoutdir=self.cachedir
            )

        if gitrepo:
            self.gitrepo = gitrepo
        else:
            self.gitrepo = GitRepoWrapper(cachedir=self.cachedir, repo=self.REPO)

        self.strategy = None
        self.strategies = []

        self.indexed_at = False
        self.updated_at = None
        self.update()

    def update(self, email_cache=None):
        if email_cache:
            self.email_cache = email_cache
        self.gitrepo.update()
        self.index_files()
        self.indexed_at = datetime.datetime.now()
        self.cache_keywords()
        self.updated_at = datetime.datetime.now()

    def index_files(self):

        self.BOTMETA = {}
        self.MODULES = {}
        self.MODULE_NAMES = []
        self.MODULE_NAMESPACE_DIRECTORIES = []

        self.load_meta()

        for fn in self.gitrepo.module_files:
            if os.path.isdir(fn):
                continue
            mname = os.path.basename(fn)
            mname = mname.replace('.py', '').replace('.ps1', '')
            if mname.startswith('__'):
                continue
            mdata = {
                'name': mname,
                'repo_filename': fn,
                'filename': fn
            }
            if fn not in self.MODULES:
                self.MODULES[fn] = mdata.copy()
            else:
                self.MODULES[fn].update(mdata)

        self.MODULE_NAMESPACE_DIRECTORIES = [os.path.dirname(x) for x in self.gitrepo.module_files]
        self.MODULE_NAMESPACE_DIRECTORIES = sorted(set(self.MODULE_NAMESPACE_DIRECTORIES))

        # make a list of names by enumerating the files
        self.MODULE_NAMES = [os.path.basename(x) for x in self.gitrepo.module_files]
        self.MODULE_NAMES = [x for x in self.MODULE_NAMES if x.endswith('.py') or x.endswith('.ps1')]
        self.MODULE_NAMES = [x.replace('.ps1', '').replace('.py', '') for x in self.MODULE_NAMES]
        self.MODULE_NAMES = [x for x in self.MODULE_NAMES if not x.startswith('__')]
        self.MODULE_NAMES = sorted(set(self.MODULE_NAMES))

        # make a list of names by calling ansible-doc
        checkoutdir = self.gitrepo.checkoutdir
        checkoutdir = os.path.abspath(checkoutdir)
        cmd = '. {}/hacking/env-setup; ansible-doc -t module -F'.format(checkoutdir)
        logging.debug(cmd)
        (rc, so, se) = run_command(cmd, cwd=checkoutdir)
        if rc:
            raise Exception("'ansible-doc' command failed (%s, %s %s)" % (rc, so, se))
        lines = so.split('\n')
        for line in lines:

            parts = line.split()
            parts = [x.strip() for x in parts]

            if len(parts) != 2 or checkoutdir not in line:
                continue

            mname = parts[0]
            if mname not in self.MODULE_NAMES:
                self.MODULE_NAMES.append(mname)

            fpath = parts[1]
            fpath = fpath.replace(checkoutdir + '/', '')

            if fpath not in self.MODULES:
                self.MODULES[fpath] = {
                    'name': mname,
                    'repo_filename': fpath,
                    'filename': fpath
                }

        _modules = self.MODULES.copy()
        for k, v in _modules.items():
            kparts = os.path.splitext(k)
            if kparts[-1] == '.ps1':
                _k = kparts[0] + '.py'
                checkpath = os.path.join(checkoutdir, _k)
                if not os.path.isfile(checkpath):
                    _k = k
            else:
                _k = k
            ME = ModuleExtractor(os.path.join(checkoutdir, _k), email_cache=self.email_cache)
            if k not in self.BOTMETA['files']:
                self.BOTMETA['files'][k] = {
                    'deprecated': os.path.basename(k).startswith('_'),
                    'labels': os.path.dirname(k).split('/'),
                    'authors': ME.authors,
                    'maintainers': ME.authors,
                    'maintainers_keys': [],
                    'notified': ME.authors,
                    'ignored': [],
                    'support': ME.metadata.get('supported_by', 'community'),
                    'metadata': ME.metadata.copy()
                }
            else:
                bmeta = self.BOTMETA['files'][k].copy()
                bmeta['metadata'] = ME.metadata.copy()
                if 'notified' not in bmeta:
                    bmeta['notified'] = []
                if 'maintainers' not in bmeta:
                    bmeta['maintainers'] = []
                if not bmeta.get('supported_by'):
                    bmeta['supported_by'] = ME.metadata.get('supported_by', 'community')
                if 'authors' not in bmeta:
                    bmeta['authors'] = []
                for x in ME.authors:
                    if x not in bmeta['authors']:
                        bmeta['authors'].append(x)
                    if x not in bmeta['maintainers']:
                        bmeta['maintainers'].append(x)
                    if x not in bmeta['notified']:
                        bmeta['notified'].append(x)
                if not bmeta.get('labels'):
                    bmeta['labels'] = os.path.dirname(k).split('/')
                bmeta['deprecated'] = os.path.basename(k).startswith('_')
                self.BOTMETA['files'][k].update(bmeta)

            # clean out the ignorees
            if 'ignored' in self.BOTMETA['files'][k]:
                for ignoree in self.BOTMETA['files'][k]['ignored']:
                    for thiskey in ['maintainers', 'notified']:
                        while ignoree in self.BOTMETA['files'][k][thiskey]:
                            self.BOTMETA['files'][k][thiskey].remove(ignoree)

            # write back to the modules
            self.MODULES[k].update(self.BOTMETA['files'][k])

    def load_meta(self):
        if self.botmetafile is not None:
            with open(self.botmetafile, 'rb') as f:
                rdata = f.read()
        else:
            fp = '.github/BOTMETA.yml'
            rdata = self.gitrepo.get_file_content(fp)
        self.BOTMETA = BotMetadataParser.parse_yaml(rdata)

    def cache_keywords(self):
        for k, v in self.BOTMETA['files'].items():
            if not v.get('keywords'):
                continue
            for kw in v['keywords']:
                if kw not in self.KEYWORDS:
                    self.KEYWORDS[kw] = k

    def clean_body(self, body, internal=False):
        body = body.lower()
        body = body.strip()
        for SC in self.STOPCHARS:
            if body.startswith(SC):
                body = body.lstrip(SC)
                body = body.strip()
            if body.endswith(SC):
                body = body.rstrip(SC)
                body = body.strip()
            if internal and SC in body:
                body = body.replace(SC, '')
                body = body.strip()
        body = body.strip()
        return body

    def match(self, issuewrapper):
        iw = issuewrapper
        matchdata = self.match_components(
            iw.title,
            iw.body,
            iw.template_data.get('component_raw'),
            files=iw.files
        )
        return matchdata

    def match_components(self, title, body, component, files=None):
        """Make a list of matching files with metadata"""

        self.strategy = None
        self.strategies = []

        # No matching necessary for PRs, but should provide consistent api
        if files:
            matched_filenames = files[:]
        else:
            matched_filenames = []
            if component is None:
                return matched_filenames

            component = component.encode('ascii', 'ignore')
            logging.debug('match "{}"'.format(component))

            delimiters = ['\n', ',', ' + ', ' & ']
            delimited = False
            for delimiter in delimiters:
                if delimiter in component:
                    delimited = True
                    components = component.split(delimiter)
                    for _component in components:
                        _matches = self._match_component(title, body, _component)
                        self.strategies.append(self.strategy)

                        # bypass for blacklist
                        if None in _matches:
                            _matches = []

                        matched_filenames += _matches

                    # do not process any more delimiters
                    break

            if not delimited:
                matched_filenames += self._match_component(title, body, component)
                self.strategies.append(self.strategy)

                # bypass for blacklist
                if None in matched_filenames:
                    return []

            # reduce subpaths
            if matched_filenames:
                matched_filenames = self.reduce_filepaths(matched_filenames)

        # create metadata for each matched file
        component_matches = []
        matched_filenames = sorted(set(matched_filenames))
        for fn in matched_filenames:
            component_matches.append(self.get_meta_for_file(fn))

        return component_matches

    def _match_component(self, title, body, component):
        """Find matches for a single line"""
        matched_filenames = []

        # context sets the path prefix to narrow the search window
        if 'module_util' in title.lower() or 'module_util' in component.lower():
            context = 'lib/ansible/module_utils'
        elif 'module util' in title.lower() or 'module util' in component.lower():
            context = 'lib/ansible/module_utils'
        elif 'module' in title.lower() or 'module' in component.lower():
            context = 'lib/ansible/modules'
        elif 'dynamic inventory' in title.lower() or 'dynamic inventory' in component.lower():
            context = 'contrib/inventory'
        elif 'inventory script' in title.lower() or 'inventory script' in component.lower():
            context = 'contrib/inventory'
        elif 'inventory plugin' in title.lower() or 'inventory plugin' in component.lower():
            context = 'lib/ansible/plugins/inventory'
        else:
            context = None

        if not component:
            return []

        if component not in self.STOPWORDS and component not in self.STOPCHARS:

            if not matched_filenames:
                matched_filenames += self.search_by_keywords(component, exact=True)
                if matched_filenames:
                    self.strategy = 'search_by_keywords'

            if not matched_filenames:
                matched_filenames += self.search_by_module_name(component)
                if matched_filenames:
                    self.strategy = 'search_by_module_name'

            if not matched_filenames:
                matched_filenames += self.search_by_regex_module_globs(component)
                if matched_filenames:
                    self.strategy = 'search_by_regex_module_globs'

            if not matched_filenames:
                matched_filenames += self.search_by_regex_modules(component)
                if matched_filenames:
                    self.strategy = 'search_by_regex_modules'

            if not matched_filenames:
                matched_filenames += self.search_by_regex_generic(component)
                if matched_filenames:
                    self.strategy = 'search_by_regex_generic'

            if not matched_filenames:
                matched_filenames += self.search_by_regex_urls(component)
                if matched_filenames:
                    self.strategy = 'search_by_regex_urls'

            if not matched_filenames:
                matched_filenames += self.search_by_tracebacks(component)
                if matched_filenames:
                    self.strategy = 'search_by_tracebacks'

            if not matched_filenames:
                matched_filenames += self.search_by_filepath(component, context=context)
                if matched_filenames:
                    self.strategy = 'search_by_filepath'
                if not matched_filenames:
                    matched_filenames += self.search_by_filepath(component, partial=True)
                    if matched_filenames:
                        self.strategy = 'search_by_filepath[partial]'

            if not matched_filenames:
                matched_filenames += self.search_by_keywords(component, exact=False)
                if matched_filenames:
                    self.strategy = 'search_by_keywords!exact'

            if matched_filenames:
                matched_filenames += self.include_modules_from_test_targets(matched_filenames)

        return matched_filenames

    def search_by_module_name(self, component):
        matches = []

        component = self.clean_body(component)

        # docker-container vs. docker_container
        if component not in self.MODULE_NAMES:
            component = component.replace('-', '_')

        if component in self.MODULE_NAMES:
            mmatch = self.find_module_match(component)
            if mmatch:
                if isinstance(mmatch, list):
                    for x in mmatch:
                        matches.append(x['repo_filename'])
                else:
                    matches.append(mmatch['repo_filename'])

        return matches

    def search_by_keywords(self, component, exact=True):
        """Simple keyword search"""

        component = component.lower()
        matches = []
        if component in self.STOPWORDS:
            matches = [None]
        elif component in self.KEYWORDS:
            matches = [self.KEYWORDS[component]]
        elif not exact:
            for k, v in self.KEYWORDS.items():
                if ' ' + k + ' ' in component or ' ' + k + ' ' in component.lower():
                    logging.debug('keyword match: {}'.format(k))
                    matches.append(v)
                elif ' ' + k + ':' in component or ' ' + k + ':' in component:
                    logging.debug('keyword match: {}'.format(k))
                    matches.append(v)
                elif component.endswith(' ' + k) or component.lower().endswith(' ' + k):
                    logging.debug('keyword match: {}'.format(k))
                    matches.append(v)

                elif (k in component or k in component.lower()) and k in self.BLACKLIST:
                    logging.debug('blacklist  match: {}'.format(k))
                    matches.append(None)

        return matches

    def search_by_regex_urls(self, body):
        # http://docs.ansible.com/ansible/latest/copy_module.html
        # http://docs.ansible.com/ansible/latest/dev_guide/developing_modules.html
        # http://docs.ansible.com/ansible/latest/postgresql_db_module.html
        # [helm module](https//docs.ansible.com/ansible/2.4/helm_module.html)
        # Windows module: win_robocopy\nhttp://docs.ansible.com/ansible/latest/win_robocopy_module.html
        # Examples:\n* archive (https://docs.ansible.com/ansible/archive_module.html)\n* s3_sync (https://docs.ansible.com/ansible/s3_sync_module.html)
        # https//github.com/ansible/ansible/blob/devel/lib/ansible/modules/windows/win_dsc.ps1L228

        matches = []

        urls = re.findall(
            'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
            body
        )
        if urls:
            for url in urls:
                url = url.rstrip(')')
                if '/blob' in url and url.endswith('.py'):
                    parts = url.split('/')
                    bindex = parts.index('blob')
                    fn = '/'.join(parts[bindex+2:])
                    matches.append(fn)
                elif '_module.html' in url:
                    parts = url.split('/')
                    fn = parts[-1].replace('_module.html', '')
                    choices = [x for x in self.gitrepo.files if '/' + fn in x or '/_' + fn in x]
                    choices = [x for x in choices if 'lib/ansible/modules' in x]

                    if len(choices) > 1:
                        choices = [x for x in choices if '/' + fn + '.py' in x or '/' + fn + '.ps1' in x or '/_' + fn + '.py' in x]

                    if not choices:
                        pass
                    elif len(choices) == 1:
                        matches.append(choices[0])
                    else:
                        pass
                else:
                    pass

        return matches

    def search_by_regex_modules(self, body):
        # foo module
        # foo and bar modules
        # foo* modules
        # foo* module

        body = body.lower()
        logging.debug('attempt regex match on: {}'.format(body))

        # https://www.tutorialspoint.com/python/python_reg_expressions.htm
        patterns = [
            r'\:\n(\S+)\.py',
            r'(\S+)\.py',
            r'\-(\s+)(\S+)(\s+)module',
            r'\`ansible_module_(\S+)\.py\`',
            r'module(\s+)\-(\s+)(\S+)',
            r'module(\s+)(\S+)',
            r'\`(\S+)\`(\s+)module',
            r'(\S+)(\s+)module',
            r'the (\S+) command',
            r'(\S+) \(.*\)',
            r'(\S+)\-module',
            r'modules/(\S+)',
            r'module\:(\s+)\`(\S+)\`',
            r'module\: (\S+)',
            r'module (\S+)',
            r'module `(\S+)`',
            r'module: (\S+)',
            r'new (\S+) module',
            r'the (\S+) module',
            r'the \"(\S+)\" module',
            r':\n(\S+) module',
            r'(\S+) module',
            r'(\S+) core module',
            r'(\S+) extras module',
            r':\n\`(\S+)\` module',
            r'\`(\S+)\` module',
            r'`(\S+)` module',
            r'(\S+)\* modules',
            r'(\S+) and (\S+)',
            r'(\S+) or (\S+)',
            r'(\S+) \+ (\S+)',
            r'(\S+) \& (\S)',
            r'(\S+) and (\S+) modules',
            r'(\S+) or (\S+) module',
            r'(\S+)_module',
            r'action: (\S+)',
            r'action (\S+)',
            r'ansible_module_(\S+)\.py',
            r'ansible_module_(\S+)',
            r'ansible_modules_(\S+)\.py',
            r'ansible_modules_(\S+)',
            r'(\S+) task',
            r'(\s+)\((\S+)\)',
            r'(\S+)(\s+)(\S+)(\s+)modules',
            r'(\S+)(\s+)module\:(\s+)(\S+)',
            r'\-(\s+)(\S+)(\s+)module',
            r'\:(\s+)(\S+)(\s+)module',
            r'\-(\s+)ansible(\s+)(\S+)(\s+)(\S+)(\s+)module',
            r'.*(\s+)(\S+)(\s+)module.*'
        ]

        matches = []

        logging.debug('check patterns against: {}'.format(body))

        for pattern in patterns:
            mobj = re.match(pattern, body, re.M | re.I)

            if mobj:
                logging.debug('pattern {} matched on "{}"'.format(pattern, body))

                for x in range(0, mobj.lastindex+1):
                    try:
                        mname = mobj.group(x)
                        logging.debug('mname: {}'.format(mname))
                        if mname == body:
                            continue
                        mname = self.clean_body(mname)
                        if not mname.strip():
                            continue
                        mname = mname.strip().lower()
                        if ' ' in mname:
                            continue
                        if '/' in mname:
                            continue

                        mname = mname.replace('.py', '').replace('.ps1', '')
                        logging.debug('--> {}'.format(mname))

                        # attempt to match a module
                        module_match = self.find_module_match(mname)

                        if not module_match:
                            pass
                        elif isinstance(module_match, list):
                            for m in module_match:
                                matches.append(m['repo_filename'])
                        elif isinstance(module_match, dict):
                            matches.append(module_match['repo_filename'])
                    except Exception as e:
                        logging.error(e)

                if matches:
                    break

        return matches

    def search_by_regex_module_globs(self, body):
        # All AWS modules
        # BigIP modules
        # NXOS modules
        # azurerm modules

        matches = []
        body = self.clean_body(body)
        logging.debug('try globs on: {}'.format(body))

        keymap = {
            'all': None,
            'ec2': 'lib/ansible/modules/cloud/amazon',
            'ec2_*': 'lib/ansible/modules/cloud/amazon',
            'aws': 'lib/ansible/modules/cloud/amazon',
            'amazon': 'lib/ansible/modules/cloud/amazon',
            'google': 'lib/ansible/modules/cloud/google',
            'gce': 'lib/ansible/modules/cloud/google',
            'gcp': 'lib/ansible/modules/cloud/google',
            'bigip': 'lib/ansible/modules/network/f5',
            'nxos': 'lib/ansible/modules/network/nxos',
            'azure': 'lib/ansible/modules/cloud/azure',
            'azurerm': 'lib/ansible/modules/cloud/azure',
            'openstack': 'lib/ansible/modules/cloud/openstack',
            'ios': 'lib/ansible/modules/network/ios',
        }

        regexes = [
            r'(\S+) ansible modules',
            r'all (\S+) based modules',
            r'all (\S+) modules',
            r'.* all (\S+) modules.*',
            r'(\S+) modules',
            r'(\S+\*) modules',
            r'all cisco (\S+\*) modules',
        ]

        mobj = None
        for x in regexes:
            mobj = re.match(x, body)
            if mobj:
                logging.debug('matched glob: {}'.format(x))
                break

        if not mobj:
            logging.debug('no glob matches')

        if mobj:
            keyword = mobj.group(1)
            if not keyword.strip():
                pass
            elif keyword in keymap:
                if keymap[keyword]:
                    matches.append(keymap[keyword])
            else:

                if '*' in keyword:
                    keyword = keyword.replace('*', '')

                # check for directories first
                fns = [x for x in self.MODULE_NAMESPACE_DIRECTORIES if keyword in x]

                # check for files second
                if not fns:
                    fns = [x for x in self.gitrepo.module_files if 'lib/ansible/modules' in x and keyword in x]

                if fns:
                    matches += fns

        if matches:
            matches = sorted(set(matches))

        return matches

    def search_by_regex_generic(self, body):
        # foo dynamic inventory script
        # foo filter

        # https://www.tutorialspoint.com/python/python_reg_expressions.htm
        patterns = [
            [r'(.*) action plugin', 'lib/ansible/plugins/action'],
            [r'(.*) inventory plugin', 'lib/ansible/plugins/inventory'],
            [r'(.*) dynamic inventory', 'contrib/inventory'],
            [r'(.*) dynamic inventory (script|file)', 'contrib/inventory'],
            [r'(.*) inventory script', 'contrib/inventory'],
            [r'(.*) filter', 'lib/ansible/plugins/filter'],
            [r'(.*) jinja filter', 'lib/ansible/plugins/filter'],
            [r'(.*) jinja2 filter', 'lib/ansible/plugins/filter'],
            [r'(.*) template filter', 'lib/ansible/plugins/filter'],
            [r'(.*) fact caching plugin', 'lib/ansible/plugins/cache'],
            [r'(.*) fact caching module', 'lib/ansible/plugins/cache'],
            [r'(.*) lookup plugin', 'lib/ansible/plugins/lookup'],
            [r'(.*) lookup', 'lib/ansible/plugins/lookup'],
            [r'(.*) callback plugin', 'lib/ansible/plugins/callback'],
            [r'(.*)\.py callback', 'lib/ansible/plugins/callback'],
            [r'callback plugin (.*)', 'lib/ansible/plugins/callback'],
            [r'(.*) stdout callback', 'lib/ansible/plugins/callback'],
            [r'stdout callback (.*)', 'lib/ansible/plugins/callback'],
            [r'stdout_callback (.*)', 'lib/ansible/plugins/callback'],
            [r'(.*) callback plugin', 'lib/ansible/plugins/callback'],
            [r'(.*) connection plugin', 'lib/ansible/plugins/connection'],
            [r'(.*) connection type', 'lib/ansible/plugins/connection'],
            [r'(.*) connection', 'lib/ansible/plugins/connection'],
            [r'(.*) transport', 'lib/ansible/plugins/connection'],
            [r'connection=(.*)', 'lib/ansible/plugins/connection'],
            [r'connection: (.*)', 'lib/ansible/plugins/connection'],
            [r'connection (.*)', 'lib/ansible/plugins/connection'],
            [r'strategy (.*)', 'lib/ansible/plugins/strategy'],
            [r'(.*) strategy plugin', 'lib/ansible/plugins/strategy'],
            [r'(.*) module util', 'lib/ansible/module_utils'],
            [r'ansible-galaxy (.*)', 'lib/ansible/galaxy'],
            [r'ansible-playbook (.*)', 'lib/ansible/playbook'],
            [r'ansible/module_utils/(.*)', 'lib/ansible/module_utils'],
            [r'module_utils/(.*)', 'lib/ansible/module_utils'],
            [r'lib/ansible/module_utils/(.*)', 'lib/ansible/module_utils'],
            [r'(\S+) documentation fragment', 'lib/ansible/utils/module_docs_fragments'],
        ]

        body = self.clean_body(body)

        matches = []

        for pattern in patterns:
            mobj = re.match(pattern[0], body, re.M | re.I)

            if mobj:
                logging.debug('pattern hit: {}'.format(pattern))
                fname = mobj.group(1)
                fname = fname.lower()

                fpath = os.path.join(pattern[1], fname)

                if fpath in self.gitrepo.files:
                    matches.append(fpath)
                elif os.path.join(pattern[1], fname + '.py') in self.gitrepo.files:
                    fname = os.path.join(pattern[1], fname + '.py')
                    matches.append(fname)
                else:
                    # fallback to the directory
                    matches.append(pattern[1])

        return matches

    def search_by_tracebacks(self, body):

        matches = []

        if 'Traceback (most recent call last)' in body:
            lines = body.split('\n')
            for line in lines:
                line = line.strip()
                if line.startswith('DistributionNotFound'):
                    matches = ['setup.py']
                    break
                elif line.startswith('File'):
                    fn = line.split()[1]
                    for SC in self.STOPCHARS:
                        fn = fn.replace(SC, '')
                    if 'ansible_module_' in fn:
                        fn = os.path.basename(fn)
                        fn = fn.replace('ansible_module_', '')
                        matches = [fn]
                    elif 'cli/playbook.py' in fn:
                        fn = 'lib/ansible/cli/playbook.py'
                    elif 'module_utils' in fn:
                        idx = fn.find('module_utils/')
                        fn = 'lib/ansible/' + fn[idx:]
                    elif 'ansible/' in fn:
                        idx = fn.find('ansible/')
                        fn1 = fn[idx:]

                        if 'bin/' in fn1:
                            if not fn1.startswith('bin'):

                                idx = fn1.find('bin/')
                                fn1 = fn1[idx:]

                                if fn1.endswith('.py'):
                                    fn1 = fn1.rstrip('.py')

                        elif 'cli/' in fn1:
                            idx = fn1.find('cli/')
                            fn1 = fn1[idx:]
                            fn1 = 'lib/ansible/' + fn1

                        elif 'lib' not in fn1:
                            fn1 = 'lib/' + fn1

                        if fn1 not in self.files:
                            pass

        return matches

    def search_by_filepath(self, body, partial=False, context=None):
        """Find known filepaths in body"""

        matches = []
        body = self.clean_body(body)

        if not body:
            return []
        if body.lower() in self.STOPCHARS:
            return []
        if body.lower() in self.STOPWORDS:
            return []

        # 'inventory manager' vs. 'inventory/manager'
        if partial and ' ' in body:
            body = body.replace(' ', '/')

        if 'site-packages' in body:
            res = re.match('(.*)/site-packages/(.*)', body)
            body = res.group(2)
        if 'modules/core/' in body:
            body = body.replace('modules/core/', 'modules/')
        if 'modules/extras/' in body:
            body = body.replace('modules/extras/', 'modules/')
        if 'ansible-modules-core/' in body:
            body = body.replace('ansible-modules-core/', '/')
        if 'ansible-modules-extras/' in body:
            body = body.replace('ansible-modules-extras/', '/')
        if body.startswith('ansible/lib/ansible'):
            body = body.replace('ansible/lib', 'lib')
        if body.startswith('ansible/') and not body.startswith('ansible/modules'):
            body = body.replace('ansible/', '', 1)
        if 'module/' in body:
            body = body.replace('module/', 'modules/')

        logging.debug('search filepath [{}] [{}]: {}'.format(context, partial, body))

        if len(body) < 2:
            return []

        if '/' in body:
            body_paths = body.split('/')
        elif ' ' in body:
            body_paths = body.split()
            body_paths = [x.strip() for x in body_paths if x.strip()]
        else:
            body_paths = [body]

        if 'networking' in body_paths:
            ix = body_paths.index('networking')
            body_paths[ix] = 'network'
        if 'plugin' in body_paths:
            ix = body_paths.index('plugin')
            body_paths[ix] = 'plugins'

        if not context or 'lib/ansible/modules' in context:
            mmatch = self.find_module_match(body)
            if mmatch:
                if isinstance(mmatch, list) and len(mmatch) > 1:

                    # only allow for exact prefix globbing here ...
                    if [x for x in mmatch if x['repo_filename'].startswith(body)]:
                        return [x['repo_filename'] for x in mmatch]

                elif isinstance(mmatch, list):
                    return [x['repo_filename'] for x in mmatch]
                else:
                    return [mmatch['repo_filename']]

        if body in self.gitrepo.files:
            matches = [body]
        else:
            for fn in self.gitrepo.files:

                # limit the search set if a context is given
                if context is not None and not fn.startswith(context):
                    continue

                if fn.endswith(body) or fn.endswith(body + '.py') or fn.endswith(body + '.ps1'):
                    # ios_config.py -> test_ios_config.py vs. ios_config.py
                    bn1 = os.path.basename(body)
                    bn2 = os.path.basename(fn)
                    if bn2.startswith(bn1):
                        matches = [fn]
                        break

                if partial:

                    # netapp_e_storagepool storage module
                    # lib/ansible/modules/storage/netapp/netapp_e_storagepool.py

                    # if all subpaths are in this filepath, it is a match
                    bp_total = 0
                    fn_paths = fn.split('/')
                    fn_paths.append(fn_paths[-1].replace('.py', '').replace('.ps1', ''))

                    for bp in body_paths:
                        if bp in fn_paths:
                            bp_total += 1

                    if bp_total == len(body_paths):
                        matches = [fn]
                        break

                    elif bp_total > 1:

                        if (float(bp_total) / float(len(body_paths))) >= (2.0 / 3.0):
                            if fn not in matches:
                                matches.append(fn)

        if matches:
            tr = []
            for match in matches[:]:
                # reduce to longest path
                for m in matches:
                    if match == m:
                        continue
                    if len(m) < match and match.startswith(m):
                        tr.append(m)

            for r in tr:
                if r in matches:
                    logging.debug('trimming {}'.format(r))
                    matches.remove(r)

        matches = sorted(set(matches))
        logging.debug('return: {}'.format(matches))

        return matches

    def reduce_filepaths(self, matches):

        # unique
        _matches = []
        for _match in matches:
            if _match not in _matches:
                _matches.append(_match)
        matches = _matches[:]

        # squash to longest path
        if matches:
            tr = []
            for match in matches[:]:
                # reduce to longest path
                for m in matches:
                    if match == m:
                        continue
                    if m is None or match is None:
                        continue
                    if len(m) < match and match.startswith(m) or match.endswith(m):
                        tr.append(m)

            for r in tr:
                if r in matches:
                    matches.remove(r)
        return matches

    def include_modules_from_test_targets(self, matches):
        """Map test targets to the module files"""
        new_matches = []
        for match in matches:
            if not match:
                continue
            # include modules from test targets
            if 'test/integration/targets' in match:
                paths = match.split('/')
                tindex = paths.index('targets')
                mname = paths[tindex+1]
                mrs = self.find_module_match(mname, exact=True)
                if mrs:
                    if not isinstance(mrs, list):
                        mrs = [mrs]
                    for mr in mrs:
                        new_matches.append(mr['repo_filename'])
        return new_matches

    def get_meta_for_file(self, filename):
        meta = {
            'repo_filename': filename,
            'name': os.path.basename(filename).split('.')[0],
            'notify': [],
            'assign': [],
            'authors': [],
            'committers': [],
            'maintainers': [],
            'labels': [],
            'ignore': [],
            'support': None,
            'supported_by': None,
            'deprecated': False,
            'topic': None,
            'subtopic': None,
            'namespace': None,
            'namespace_maintainers': []
        }

        populated = False
        filenames = [filename, os.path.splitext(filename)[0]]

        # powershell meta is in the python file
        if filename.endswith('.ps1'):
            pyfile = filename.replace('.ps1', '.py')
            if pyfile in self.BOTMETA['files']:
                filenames.append(pyfile)

        botmeta_entries = self.file_indexer._filenames_to_keys(filenames)

        for entry in botmeta_entries:
            fdata = self.BOTMETA['files'][entry].copy()

            if 'authors' in fdata:
                meta['authors'] = fdata['authors']
            if 'maintainers' in fdata:
                meta['notify'] += fdata['maintainers']
                meta['assign'] += fdata['maintainers']
                meta['maintainers'] += fdata['maintainers']
            if 'notified' in fdata:
                meta['notify'] += fdata['notified']
            if 'labels' in fdata:
                meta['labels'] += fdata['labels']
            if 'ignore' in fdata:
                meta['ignore'] += fdata['ignore']
            if 'ignored' in fdata:
                meta['ignore'] += fdata['ignored']
            if 'support' in fdata:
                if isinstance(fdata['support'], list):
                    meta['support'] = fdata['support'][0]
                else:
                    meta['support'] = fdata['support']
            elif 'supported_by' in fdata:
                if isinstance(fdata['supported_by'], list):
                    meta['support'] = fdata['supported_by'][0]
                else:
                    meta['support'] = fdata['supported_by']

            if 'deprecated' in fdata:
                meta['deprecated'] = fdata['deprecated']

            populated = True

        # walk up the tree for more meta
        paths = filename.split('/')
        for idx, x in enumerate(paths):
            thispath = '/'.join(paths[:(0-idx)])
            if thispath in self.BOTMETA['files']:
                fdata = self.BOTMETA['files'][thispath].copy()
                if 'support' in fdata and not meta['support']:
                    if isinstance(fdata['support'], list):
                        meta['support'] = fdata['support'][0]
                    else:
                        meta['support'] = fdata['support']
                if 'labels' in fdata:
                    meta['labels'] += fdata['labels']
                if 'maintainers' in fdata:
                    meta['notify'] += fdata['maintainers']
                    meta['assign'] += fdata['maintainers']
                    meta['maintainers'] += fdata['maintainers']
                if 'ignore' in fdata:
                    meta['ignore'] += fdata['ignore']
                if 'notified' in fdata:
                    meta['notify'] += fdata['notified']

        if 'lib/ansible/modules' in filename:
            topics = [x for x in paths if x not in ['lib', 'ansible', 'modules']]
            topics = [x for x in topics if x != os.path.basename(filename)]
            if len(topics) == 2:
                meta['topic'] = topics[0]
                meta['subtopic'] = topics[1]
            elif len(topics) == 1:
                meta['topic'] = topics[0]

            meta['namespace'] = '/'.join(topics)

        # set namespace maintainers (skip !modules for now)
        if filename.startswith('lib/ansible/modules'):
            ns = meta.get('namespace')
            keys = self.BOTMETA['files'].keys()
            keys = [x for x in keys if x.startswith(os.path.join('lib/ansible/modules', ns))]
            ignored = []

            for key in keys:
                meta['namespace_maintainers'] += self.BOTMETA['files'][key].get('maintainers', [])
                ignored += self.BOTMETA['files'][key].get('ignored', [])

            for ignoree in ignored:
                while ignoree in meta['namespace_maintainers']:
                    meta['namespace_maintainers'].remove(ignoree)

        # new modules should default to "community" support
        if filename.startswith('lib/ansible/modules') and filename not in self.gitrepo.files:
            meta['support'] = 'community'
            meta['supported_by'] = 'community'

        # test targets for modules should inherit from their modules
        if filename.startswith('test/integration/targets') and filename not in self.BOTMETA['files']:
            whitelist = [
                'labels',
                'ignore',
                'deprecated',
                'authors',
                'assign',
                'maintainers',
                'notify',
                'topic',
                'subtopic',
                'support'
            ]

            paths = filename.split('/')
            tindex = paths.index('targets')
            mname = paths[tindex+1]
            mmatch = self._find_module_match(mname, exact=True)
            if mmatch:
                mmeta = self.get_meta_for_file(mmatch[0]['repo_filename'])
                for k, v in mmeta.items():
                    if k in whitelist and v:
                        if isinstance(meta[k], list):
                            meta[k] = sorted(set(meta[k] + v))
                        elif not meta[k]:
                            meta[k] = v

            # make new test targets community by default
            if not meta['support'] and not meta['supported_by']:
                meta['support'] = 'community'

        # it's okay to remove things from legacy-files.txt
        if filename == 'test/sanity/pep8/legacy-files.txt' and not meta['support']:
            meta['support'] = 'community'

        # fallback to core support
        if not meta['support']:
            meta['support'] = 'core'

        # align support and supported_by
        if meta['support'] != meta['supported_by']:
            if meta['support'] and not meta['supported_by']:
                meta['supported_by'] = meta['support']
            elif not meta['support'] and meta['supported_by']:
                meta['support'] = meta['supported_by']

        # clean up the result
        _meta = meta.copy()
        for k, v in _meta.items():
            if isinstance(v, list):
                meta[k] = sorted(set(v))

        # walk up the botmeta tree looking for ignores to include
        if meta.get('repo_filename'):
            namespace_paths = os.path.dirname(meta['repo_filename'])
            namespace_paths = namespace_paths.split('/')
            for x in reversed(range(0, len(namespace_paths) + 1)):
                this_ns_path = '/'.join(namespace_paths[:x])
                if not this_ns_path:
                    continue
                print('check {}'.format(this_ns_path))
                if this_ns_path in self.BOTMETA['files']:
                    this_ignore = self.BOTMETA['files'][this_ns_path].get('ignore') or \
                        self.BOTMETA['files'][this_ns_path].get('ignored') or \
                        self.BOTMETA['files'][this_ns_path].get('ignores')
                    print('ignored: {}'.format(this_ignore))
                    if this_ignore:
                        for username in this_ignore:
                            if username not in meta['ignore']:
                                meta['ignore'].append(username)

        # process ignores AGAIN.
        if meta.get('ignore'):
            for k, v in meta.items():
                if k == 'ignore':
                    continue
                if not isinstance(v, list):
                    continue
                for ignoree in meta['ignore']:
                    if ignoree in v:
                        meta[k].remove(ignoree)

        return meta

    def find_module_match(self, pattern, exact=False):
        '''Exact module name matching'''

        logging.debug('find_module_match for "{}"'.format(pattern))
        candidate = None

        BLACKLIST = [
            'module_utils',
            'callback',
            'network modules',
            'networking modules'
            'windows modules'
        ]

        if not pattern or pattern is None:
            return None

        # https://github.com/ansible/ansible/issues/19755
        if pattern == 'setup':
            pattern = 'lib/ansible/modules/system/setup.py'

        if '/facts.py' in pattern or ' facts.py' in pattern:
            pattern = 'lib/ansible/modules/system/setup.py'

        # https://github.com/ansible/ansible/issues/18527
        #   docker-container -> docker_container
        if '-' in pattern:
            pattern = pattern.replace('-', '_')

        if 'module_utils' in pattern:
            # https://github.com/ansible/ansible/issues/20368
            return None
        elif 'callback' in pattern:
            return None
        elif 'lookup' in pattern:
            return None
        elif 'contrib' in pattern and 'inventory' in pattern:
            return None
        elif pattern.lower() in BLACKLIST:
            return None

        candidate = self._find_module_match(pattern, exact=exact)

        if not candidate:
            candidate = self._find_module_match(os.path.basename(pattern))

        if not candidate and '/' in pattern and not pattern.startswith('lib/'):
            ppy = None
            ps1 = None
            if not pattern.endswith('.py') and not pattern.endswith('.ps1'):
                ppy = pattern + '.py'
            if not pattern.endswith('.py') and not pattern.endswith('.ps1'):
                ps1 = pattern + '.ps1'
            for mf in self.gitrepo.module_files:
                if pattern in mf:
                    if mf.endswith(pattern) or mf.endswith(ppy) or mf.endswith(ps1):
                        candidate = mf
                        break

        return candidate

    def _find_module_match(self, pattern, exact=False):

        logging.debug('matching on {}'.format(pattern))

        matches = []

        if isinstance(pattern, unicode):
            pattern = pattern.encode('ascii', 'ignore')

        logging.debug('_find_module_match: {}'.format(pattern))

        noext = pattern.replace('.py', '').replace('.ps1', '')

        # exact is looking for a very precise name such as "vmware_guest"
        if exact:
            candidates = [pattern]
        else:
            candidates = [pattern, '_' + pattern, noext, '_' + noext]

        for k, v in self.MODULES.items():
            if v['name'] in candidates:
                logging.debug('match {} on name: {}'.format(k, v['name']))
                matches = [v]
                break

        if not matches:
            # search by key ... aka the filepath
            for k, v in self.MODULES.items():
                if k == pattern:
                    logging.debug('match {} on key: {}'.format(k, k))
                    matches = [v]
                    break

        # spellcheck
        if not exact and not matches and '/' not in pattern:
            _pattern = pattern
            if not isinstance(_pattern, unicode):
                _pattern = _pattern.decode('utf-8')
            candidates = []
            for k, v in self.MODULES.items():
                vname = v['name']
                if not isinstance(vname, unicode):
                    vname = vname.decode('utf-8')
                jw = jaro_winkler(vname, _pattern)
                if jw > .9:
                    candidates.append((jw, k))
            for candidate in candidates:
                matches.append(self.MODULES[candidate[1]])

        return matches
コード例 #3
0
ファイル: checkmatches.py プロジェクト: swipswaps/ansibullbot
def main():

    tocheck = [
        #32226,
        #30361,
        #31006,
        #58674,
        #63611,
        #64320,
        #66891,
        #68784,
        69010,
    ]

    redirect = set()
    noredirect = set()
    nometa = set()

    cachedir = '/home/jtanner/.ansibullbot/cache'
    gitrepo = GitRepoWrapper(cachedir=cachedir,
                             repo='https://github.com/ansible/ansible',
                             commit=None,
                             rebase=False)
    rdata = gitrepo.get_file_content(u'.github/BOTMETA.yml')
    botmeta = BotMetadataParser.parse_yaml(rdata)
    cm = AnsibleComponentMatcher(cachedir=cachedir,
                                 gitrepo=gitrepo,
                                 botmeta=botmeta,
                                 botmetafile=None,
                                 email_cache=None,
                                 usecache=True,
                                 use_galaxy=True)
    '''
    mr = parse_match_results()
    for issue in sorted(mr.keys(), key=lambda x: int(x.split('/')[-1]), reverse=True):

        print(issue)
        number = int(issue.split('/')[-1])
        #if number != 68709:
        #    continue
        print(number)
        mfile = os.path.join('~/.ansibullbot/cache/ansible/ansible/issues/%s' % number, 'meta.json')
        mfile = os.path.expanduser(mfile)
        if os.path.exists(mfile):
            with open(mfile, 'r') as f:
                imeta = json.loads(f.read())
        else:
            nometa.add(issue)
            imeta = {}

        if imeta:

            iw = MockIssueWrapper(issue, meta=imeta)
            cfacts = get_collection_facts(iw, cm, imeta)
            #pprint(cfacts)

            if cfacts.get('needs_collection_redirect') == True:
                redirect.add(issue)
            else:
                noredirect.add(issue)

            #if not imeta['is_backport']:
            #    import epdb; epdb.st()
    '''

    mmap = {}

    #gmatches = cm.search_ecosystem('contrib/inventory/ec2.py')
    #import epdb; epdb.st()

    mfiles = get_issues()
    for mfile in mfiles:
        with open(mfile, 'r') as f:
            imeta = json.loads(f.read())
        print(imeta['html_url'])
        number = int(imeta['html_url'].split('/')[-1])
        if number not in tocheck:
            continue

        newmeta = copy.deepcopy(imeta)
        iw = MockIssueWrapper(imeta['html_url'], meta=newmeta, gitrepo=gitrepo)
        #cmatches = cm.match_components(iw.title, iw.body, iw.component)
        cmmeta = get_component_match_facts(iw, cm, [])
        newmeta.update(cmmeta)
        cfmeta = get_collection_facts(iw, cm, newmeta)

        # check api deltas ...
        #cm1 = cm.match(iw)
        #cm2 = cm.match_components(iw.title, iw.body, iw.component, files=iw.files)
        #import epdb; epdb.st()

        print('component: %s' % iw.component)
        print(cmmeta['component_filenames'])
        #pprint(cfmeta)
        cf2vals = [x for x in list(cfmeta['collection_filemap'].values()) if x]
        cf1vals = [x for x in list(imeta['collection_filemap'].values()) if x]
        '''
        if cf1vals or cf2vals:
            pprint(cf1vals)
            pprint(cf2vals)
            #import epdb; epdb.st()
        '''
        '''
        if cf2vals != cf1vals:
            pprint(cf1vals)
            pprint(cf2vals)
            import epdb; epdb.st()
        '''
        pprint(cfmeta)
        import epdb
        epdb.st()

    print('# %s total issues|PRs without meta' % len(list(nometa)))
    print('# %s total issues|PRs not redirected to collections' %
          len(list(noredirect)))
    print('# %s total issues|PRs redirected to collections' %
          len(list(redirect)))

    import epdb
    epdb.st()