Python FileIndexer 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: ansibullbot.utils.file_tools

클래스/타입: FileIndexer

hotexamples.com에서의 예제들: 13

Python FileIndexer - 13개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 ansibullbot.utils.file_tools.FileIndexer에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

FileIndexer(7)

update(3)

get_files(2)

parse_metadata(2)

_filenames_to_keys(1)

get_component_labels(1)

get_file_content(1)

get_keywords_for_file(1)

예제 #1

파일 보기

파일: simpletriager.py 프로젝트: webknjaz/ansibullbot

    def run(self):

        # create the fileindexer
        fi_cache = '/tmp/ansibullbot/cache/{}.files.checkout'.format(self.repo)
        fi_cache = os.path.expanduser(fi_cache)
        self.file_indexer = FileIndexer(botmetafile=self.botmetafile,
                                        checkoutdir=fi_cache,
                                        repo=self.repo)
        self.file_indexer.update()

        # make a repo object for the github api
        repo = self.ghw.get_repo(self.repo)

        # map for issue type to label
        try:
            label_map = repo.get_label_map()
        except UnknownObjectException:
            label_map = {}

        # collect issues
        if not self.number:
            issues = repo.get_issues()
        else:
            issue = repo.get_issue(int(self.number))
            issues = [issue]

        # iterate through issues and apply actions
        for issue in issues:

            logging.info('triaging %s' % issue.html_url)
            actions = DefaultActions()

            # wrap the issue for extra magic
            cachedir = os.path.join(self.cachedir_base, self.repo)
            iw = IssueWrapper(github=self.ghw,
                              repo=repo,
                              issue=issue,
                              cachedir=cachedir,
                              file_indexer=self.file_indexer)

            # what did the submitter provide in the body?
            td = iw.template_data
            missing = iw.missing_template_sections
            if missing and 'needs_template' not in iw.labels:
                actions.newlabel.append('needs_template')

            # what type of issue is this?
            if 'issue type' in td:
                mapped_label = label_map.get(td['issue type'])
                if mapped_label:
                    if mapped_label not in iw.labels:
                        actions.newlabel.append(mapped_label)

            pprint(vars(actions))
            self.apply_actions(iw, actions)

예제 #2

파일 보기

파일: test_component_tools.py 프로젝트: jctanner/ansibullbot

    def setUpClass(cls):
        """Init the matcher"""
        cachedir = tempfile.mkdtemp()
        gitrepo = GitShallowRepo(cachedir=cachedir, repo=ComponentMatcher.REPO)
        gitrepo.update()

        file_indexer = FileIndexer(gitrepo=gitrepo)
        file_indexer.get_files()
        file_indexer.parse_metadata()

        cls.component_matcher = ComponentMatcher(email_cache={}, gitrepo=gitrepo, file_indexer=file_indexer)

예제 #3

파일 보기

파일: simpletriager.py 프로젝트: gundalow/ansibullbot

    def run(self):

        # create the fileindexer
        fi_cache = '/tmp/ansibullbot/cache/{}.files.checkout'.format(self.repo)
        fi_cache = os.path.expanduser(fi_cache)
        self.file_indexer = FileIndexer(botmetafile=self.botmetafile, checkoutdir=fi_cache, repo=self.repo)
        self.file_indexer.update()

        # make a repo object for the github api
        repo = self.ghw.get_repo(self.repo)

        # map for issue type to label
        try:
            label_map = repo.get_label_map()
        except UnknownObjectException:
            label_map = {}

        # collect issues
        if not self.number:
            issues = repo.get_issues()
        else:
            issue = repo.get_issue(int(self.number))
            issues = [issue]

        # iterate through issues and apply actions
        for issue in issues:

            logging.info('triaging %s' % issue.html_url)
            actions = DefaultActions()

            # wrap the issue for extra magic
            cachedir = os.path.join(self.cachedir_base, self.repo)
            iw = IssueWrapper(github=self.ghw, repo=repo, issue=issue, cachedir=cachedir, file_indexer=self.file_indexer)

            # what did the submitter provide in the body?
            td = iw.template_data
            missing = iw.missing_template_sections
            if missing and 'needs_template' not in iw.labels:
                actions.newlabel.append('needs_template')

            # what type of issue is this?
            if 'issue type' in td:
                mapped_label = label_map.get(td['issue type'])
                if mapped_label:
                    if mapped_label not in iw.labels:
                        actions.newlabel.append(mapped_label)

            pprint(vars(actions))
            self.apply_actions(iw, actions)

예제 #4

파일 보기

파일: component_tools.py 프로젝트: techtonik/ansibullbot

    def __init__(self, gitrepo=None, botmetafile=None, cachedir=None, email_cache=None, file_indexer=None):
        self.cachedir = cachedir
        self.botmetafile = botmetafile
        self.email_cache = email_cache

        if file_indexer:
            self.file_indexer = file_indexer
        else:
            self.file_indexer = FileIndexer(
                botmetafile=self.botmetafile,
                checkoutdir=self.cachedir
            )

        if gitrepo:
            self.gitrepo = gitrepo
        else:
            self.gitrepo = GitRepoWrapper(cachedir=self.cachedir, repo=self.REPO)

        self.strategy = None
        self.strategies = []

        self.indexed_at = False
        self.updated_at = None
        self.update()

예제 #5

파일 보기

파일: test_component_tools.py 프로젝트: xorel/ansibullbot

    def setUpClass(cls):
        """Init the matcher"""
        cachedir = tempfile.mkdtemp()
        gitrepo = GitShallowRepo(cachedir=cachedir, repo=ComponentMatcher.REPO)
        gitrepo.update()

        file_indexer = FileIndexer(gitrepo=gitrepo)
        file_indexer.get_files()
        file_indexer.parse_metadata()

        cls.component_matcher = ComponentMatcher(email_cache={}, gitrepo=gitrepo, file_indexer=file_indexer)

예제 #6

파일 보기

 def get_file_indexer(m_manage_checkout, m_checkoutdir):
     indexer = FileIndexer()
     indexer.get_files()
     indexer.parse_metadata()
     return indexer

예제 #7

파일 보기

def main():

    set_logger()

    METAFILES = extract_metafiles()

    SKIP = load_skip()
    EXPECTED = load_expected()
    MATCH_MAP = load_match_map()

    ERRORS = []
    ERRORS_COMPONENTS = []

    start_at = None
    if len(sys.argv) == 2:
        start_at = int(sys.argv[1])

    FI = FileIndexer(checkoutdir=CACHEDIR)
    with open('/tmp/files.json', 'wb') as f:
        f.write(json.dumps(FI.files, indent=2))
    GQLC = GithubGraphQLClient(C.DEFAULT_GITHUB_TOKEN)
    MI = ModuleIndexer(cachedir=CACHEDIR,
                       gh_client=GQLC,
                       blames=False,
                       commits=False)

    CM = AnsibleComponentMatcher(cachedir=CACHEDIR)

    for k, v in MI.modules.items():
        if k in MATCH_MAP:
            MATCH_MAP.pop(k, None)
        kname = v.get('name')
        if kname not in MATCH_MAP:
            MATCH_MAP[kname] = v.get('repo_filename')
        if kname + ' module' not in MATCH_MAP:
            MATCH_MAP[kname + ' module'] = v.get('repo_filename')
        if kname + 'module: ' + kname not in MATCH_MAP:
            MATCH_MAP['module: ' + kname] = v.get('repo_filename')
        if kname + 'module ' + kname not in MATCH_MAP:
            MATCH_MAP['module ' + kname] = v.get('repo_filename')

        # /modules/remote_management/foreman/katello.py
        pname = k.replace('lib/ansible', '')
        if pname not in MATCH_MAP:
            MATCH_MAP[pname] = v.get('repo_filename')

        # ansible/modules/packaging/os/rpm_key.py
        pname = k.replace('lib/', '/')
        if pname not in MATCH_MAP:
            MATCH_MAP[pname] = v.get('repo_filename')

        # /ansible/modules/packaging/os/rpm_key.py
        pname = k.replace('lib/', '')
        if pname not in MATCH_MAP:
            MATCH_MAP[pname] = v.get('repo_filename')

        # ansible/lib/ansible/modules/monitoring/monit.py
        pname = 'ansible/' + k
        if pname not in MATCH_MAP:
            MATCH_MAP[pname] = v.get('repo_filename')

        # network/f5/bigip_gtm_wide_ip
        pname = k.replace('lib/ansible/modules/', '')
        pname = pname.replace('.py', '')
        pname = pname.replace('.ps1', '')
        if pname not in MATCH_MAP:
            MATCH_MAP[pname] = v.get('repo_filename')

        # network/f5/bigip_gtm_wide_ip.py
        pname = k.replace('lib/ansible/modules/', '')
        if pname not in MATCH_MAP:
            MATCH_MAP[pname] = v.get('repo_filename')

        # modules/packaging/os/pkgng.py
        pname = k.replace('lib/ansible/', '')
        if pname not in MATCH_MAP:
            MATCH_MAP[pname] = v.get('repo_filename')

    save_match_map(MATCH_MAP)

    total = len(METAFILES)
    for IDMF, MF in enumerate(METAFILES):

        if start_at and IDMF < start_at:
            continue

        with open(MF, 'rb') as f:
            meta = json.loads(f.read())

        if not meta.get('is_issue'):
            continue

        component = meta.get('template_data', {}).get('component_raw')

        #if component != 'Module `synchronize`':
        #if component != 'Module: include_role':
        #    continue

        if component:
            print(f'------------------------------------------ {total}|{IDMF}')
            print(meta['html_url'])
            print(meta['title'])
            print(component)

            hurl = meta['html_url']
            if hurl in SKIP:
                continue

            # bad template or bad template parsing
            if len(component) > 100:
                continue

            iw = IssueWrapperMock(meta)
            if 'module' not in iw.body.lower(
            ) and 'module' not in iw.title.lower():
                continue

            expected_fns = []

            # OLD METHOD
            if hurl not in EXPECTED and component not in MATCH_MAP:
                cmf = get_component_match_facts(iw, meta, FI, MI, LABELS)
                expected_fns = cmf.get('module_match')
                if not isinstance(expected_fns, list):
                    expected_fns = [expected_fns]
                expected_fns = [x['repo_filename'] for x in expected_fns if x]
                if 'component_matches' in cmf:
                    expected_fns = [
                        x['filename'] for x in cmf['component_matches']
                    ]
                expected_fns = sorted(set(expected_fns))

            # NEW METHOD
            cmr = CM.match_components(iw.title, iw.body,
                                      iw.template_data.get('component_raw'))
            cmr_fns = [x['repo_filename'] for x in cmr if x]
            cmr_fns = sorted(set(cmr_fns))

            # VALIDATE FROM EXPECTED IF KNOWN
            if hurl in EXPECTED:
                if EXPECTED[hurl] and not isinstance(EXPECTED[hurl], list):
                    expected_fns = [EXPECTED[hurl]]
                elif EXPECTED[hurl]:
                    expected_fns = EXPECTED[hurl]
                else:
                    expected_fns = []

            # USE THE CACHED MAP
            if component in MATCH_MAP:
                expected_fns = MATCH_MAP[component]
                if not isinstance(expected_fns, list):
                    expected_fns = [expected_fns]
            elif component.lower() in MATCH_MAP:
                expected_fns = MATCH_MAP[component.lower()]
                if not isinstance(expected_fns, list):
                    expected_fns = [expected_fns]
            elif component.startswith(':\n') and component.endswith(' module'):
                mapkey = component.lstrip(':\n')
                if mapkey in MATCH_MAP:
                    expected_fns = MATCH_MAP[mapkey]
                    if not isinstance(expected_fns, list):
                        expected_fns = [expected_fns]

            # OLD CODE USED ACTION PLUGINS INSTEAD OF MODULES
            if expected_fns != cmr_fns and hurl not in EXPECTED:
                if len(expected_fns) == 1 and len(
                        cmr_fns) == 1 and 'plugins/action' in expected_fns[0]:
                    e_bn = os.path.basename(expected_fns[0])
                    c_bn = os.path.basename(cmr_fns[0])
                    if e_bn == c_bn:
                        MATCH_MAP[component] = cmr_fns
                        save_match_map(MATCH_MAP)
                        continue

            # DOCS URLS
            if expected_fns != cmr_fns and hurl not in EXPECTED:
                if len(cmr_fns) == 1 and 'lib/ansible/modules' in cmr_fns[0]:
                    c_bn = os.path.basename(cmr_fns[0])
                    if f'docs.ansible.com/ansible/latest/{c_bn}_module.html' in component:
                        MATCH_MAP[component] = cmr_fns
                        save_match_map(MATCH_MAP)
                        continue
                    elif CM.strategy in ['search_by_regex_urls']:
                        MATCH_MAP[component] = cmr_fns
                        save_match_map(MATCH_MAP)
                        continue

            # NXOS ISSUES HAVE NXOS_VERSION HEADER
            if '- nxos' in component:
                if len(cmr_fns) == 1:
                    if os.path.basename(cmr_fns[0]).replace('.py',
                                                            '') in component:
                        MATCH_MAP[component] = cmr_fns
                        save_match_map(MATCH_MAP)
                        continue
                #import epdb; epdb.st()

            # ODDBALL MODULE COMPONENTS
            if len(cmr_fns) == 1 and 'lib/ansible/modules' in cmr_fns[0]:
                bn = os.path.basename(cmr_fns[0])
                bn = bn.replace('.py', '')
                bn = bn.replace('.ps1', '')
                if (bn in component or bn.lstrip('_')
                        in component) and 'module' in component.lower():
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)
                    continue
                elif component == '- ' + bn:
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)
                    continue
                elif component == bn + '.py' or component == bn + '.ps1':
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)
                    continue
                elif component == '_' + bn + '.py' or component == '_' + bn + '.ps1':
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)
                    continue
                elif component == ':\n' + bn or component == ':\n' + bn.lstrip(
                        '_'):
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)
                    continue

            # 'multiple modules', etc ...
            if component in CM.KEYWORDS or component.lower() in CM.KEYWORDS:
                if component in CM.KEYWORDS and CM.KEYWORDS[
                        component] is None and not cmr_fns:
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)
                    continue
                elif component.lower() in CM.KEYWORDS and CM.KEYWORDS[
                        component.lower()] is None and not cmr_fns:
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)
                    continue
                elif len(cmr_fns) == 1 and cmr_fns[0] == CM.KEYWORDS.get(
                        component):
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)
                    continue
                elif len(cmr_fns) == 1 and cmr_fns[0] == CM.KEYWORDS.get(
                        component.lower()):
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)
                    continue

            if component.lstrip('-').strip() in CM.KEYWORDS and len(
                    cmr_fns) == 1:
                cname = component.lstrip('-').strip()
                if CM.KEYWORDS[cname] == cmr_fns[0]:
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)
                    continue

            if component.endswith(' lookup') and len(
                    cmr_fns
            ) == 1 and 'lib/ansible/plugins/lookup' in cmr_fns[0]:
                MATCH_MAP[component] = cmr_fns
                save_match_map(MATCH_MAP)
                continue

            if component.endswith(' inventory script') and len(
                    cmr_fns) == 1 and 'contrib/inventory' in cmr_fns[0]:
                MATCH_MAP[component] = cmr_fns
                save_match_map(MATCH_MAP)
                continue

            if component.startswith('ansible/lib') and len(cmr_fns) == 1:
                fn = cmr_fns[0]
                if 'ansible/' + fn == component:
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)
                    continue

            if component.endswith(' inventory plugin') and len(cmr_fns) == 1:
                fn = cmr_fns[0]
                if fn.startswith('lib/ansible/plugins/inventory'):
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)
                    continue

            if component == 'ec2.py' and cmr_fns and 'contrib/inventory/ec2.py' in cmr_fns:
                MATCH_MAP[component] = cmr_fns
                save_match_map(MATCH_MAP)
                continue

            if len(expected_fns) == 1 and len(cmr_fns) == 1:
                if os.path.basename(expected_fns[0]) == os.path.basename(
                        cmr_fns[0]):
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)
                    continue

            # COMPARE AND RECORD
            if expected_fns != cmr_fns and hurl not in EXPECTED:

                if component in MATCH_MAP or component.lower() in MATCH_MAP:
                    if component.lower() in MATCH_MAP:
                        mmc = MATCH_MAP[component.lower()]
                    else:
                        mmc = MATCH_MAP[component]
                    if not isinstance(mmc, list):
                        mmc == [mmc]
                    if mmc == cmr_fns:
                        EXPECTED[iw.html_url] = cmr_fns
                        save_expected(EXPECTED)
                        continue

                print('## COMPONENT ...')
                print(component)
                print('## EXPECTED ...')
                pprint(expected_fns)
                print('## RESULT ...')
                pprint(cmr_fns)
                print('## STRATEGIES ..')
                pprint(CM.strategy)
                pprint(CM.strategies)

                print('--------------------------------')
                res = raw_input('Is the result correct? (y/n/s/d): ')
                if res.lower() in ['y', 'yes']:
                    MATCH_MAP[component] = cmr_fns
                    EXPECTED[iw.html_url] = cmr_fns
                    save_expected(EXPECTED)
                    continue
                elif res.lower() in ['s', 'skip']:
                    SKIP.append(hurl)
                    save_skip(SKIP)
                    continue
                elif res.lower() in ['d', 'debug']:
                    import epdb
                    epdb.st()

                ERRORS.append(iw.html_url)
                ERRORS_COMPONENTS.append({
                    'url':
                    iw.html_url,
                    'component':
                    component,
                    'component_raw':
                    iw.template_data.get('component_raw'),
                    'result':
                    cmr_fns,
                    'expected':
                    expected_fns,
                    'strategy':
                    CM.strategy,
                    'strategies':
                    CM.strategies
                })

            else:

                if component not in MATCH_MAP:
                    MATCH_MAP[component] = cmr_fns
                    save_match_map(MATCH_MAP)

                if hurl not in EXPECTED:
                    EXPECTED[hurl] = cmr_fns
                    save_expected(EXPECTED)

            continue

    pprint(ERRORS)
    fn = os.path.join(FIXTUREDIR, 'component_errors.json')
    with open(fn, 'wb') as f:
        f.write(json.dumps(ERRORS_COMPONENTS, indent=2, sort_keys=True))

    clean_metafiles(METAFILES)

예제 #8

파일 보기

파일: simpletriager.py 프로젝트: webknjaz/ansibullbot

class SimpleTriager(DefaultTriager):
    def __init__(self):
        super(SimpleTriager, self).__init__()
        # get valid labels
        logging.info('getting labels')
        self.valid_labels = self.get_valid_labels(self.repo)

    @classmethod
    def create_parser(cls):
        parser = DefaultTriager.create_parser()

        parser.description = "Triage issue and pullrequest queues for any github repo.\n" \
                             " (NOTE: only useful if you have commit access to" \
                             " the repo in question.)"

        parser.add_argument(
            "--pr",
            "--id",
            type=str,
            dest="number",
            help="Triage only the specified pr|issue (separated by commas)")
        parser.add_argument("--repo",
                            "-r",
                            type=str,
                            required=True,
                            help="Github repo to triage (defaults to all)")
        return parser

    def run(self):

        # create the fileindexer
        fi_cache = '/tmp/ansibullbot/cache/{}.files.checkout'.format(self.repo)
        fi_cache = os.path.expanduser(fi_cache)
        self.file_indexer = FileIndexer(botmetafile=self.botmetafile,
                                        checkoutdir=fi_cache,
                                        repo=self.repo)
        self.file_indexer.update()

        # make a repo object for the github api
        repo = self.ghw.get_repo(self.repo)

        # map for issue type to label
        try:
            label_map = repo.get_label_map()
        except UnknownObjectException:
            label_map = {}

        # collect issues
        if not self.number:
            issues = repo.get_issues()
        else:
            issue = repo.get_issue(int(self.number))
            issues = [issue]

        # iterate through issues and apply actions
        for issue in issues:

            logging.info('triaging %s' % issue.html_url)
            actions = DefaultActions()

            # wrap the issue for extra magic
            cachedir = os.path.join(self.cachedir_base, self.repo)
            iw = IssueWrapper(github=self.ghw,
                              repo=repo,
                              issue=issue,
                              cachedir=cachedir,
                              file_indexer=self.file_indexer)

            # what did the submitter provide in the body?
            td = iw.template_data
            missing = iw.missing_template_sections
            if missing and 'needs_template' not in iw.labels:
                actions.newlabel.append('needs_template')

            # what type of issue is this?
            if 'issue type' in td:
                mapped_label = label_map.get(td['issue type'])
                if mapped_label:
                    if mapped_label not in iw.labels:
                        actions.newlabel.append(mapped_label)

            pprint(vars(actions))
            self.apply_actions(iw, actions)

예제 #9

파일 보기

파일: simpletriager.py 프로젝트: gundalow/ansibullbot

class SimpleTriager(DefaultTriager):

    def __init__(self):
        super(SimpleTriager, self).__init__()
        # get valid labels
        logging.info('getting labels')
        self.valid_labels = self.get_valid_labels(self.repo)

    @classmethod
    def create_parser(cls):
        parser = DefaultTriager.create_parser()

        parser.description = "Triage issue and pullrequest queues for any github repo.\n" \
                             " (NOTE: only useful if you have commit access to" \
                             " the repo in question.)"

        parser.add_argument("--pr", "--id", type=str, dest="number",
                            help="Triage only the specified pr|issue (separated by commas)")
        parser.add_argument("--repo", "-r", type=str, required=True,
                            help="Github repo to triage (defaults to all)")
        return parser

    def run(self):

        # create the fileindexer
        fi_cache = '/tmp/ansibullbot/cache/{}.files.checkout'.format(self.repo)
        fi_cache = os.path.expanduser(fi_cache)
        self.file_indexer = FileIndexer(botmetafile=self.botmetafile, checkoutdir=fi_cache, repo=self.repo)
        self.file_indexer.update()

        # make a repo object for the github api
        repo = self.ghw.get_repo(self.repo)

        # map for issue type to label
        try:
            label_map = repo.get_label_map()
        except UnknownObjectException:
            label_map = {}

        # collect issues
        if not self.number:
            issues = repo.get_issues()
        else:
            issue = repo.get_issue(int(self.number))
            issues = [issue]

        # iterate through issues and apply actions
        for issue in issues:

            logging.info('triaging %s' % issue.html_url)
            actions = DefaultActions()

            # wrap the issue for extra magic
            cachedir = os.path.join(self.cachedir_base, self.repo)
            iw = IssueWrapper(github=self.ghw, repo=repo, issue=issue, cachedir=cachedir, file_indexer=self.file_indexer)

            # what did the submitter provide in the body?
            td = iw.template_data
            missing = iw.missing_template_sections
            if missing and 'needs_template' not in iw.labels:
                actions.newlabel.append('needs_template')

            # what type of issue is this?
            if 'issue type' in td:
                mapped_label = label_map.get(td['issue type'])
                if mapped_label:
                    if mapped_label not in iw.labels:
                        actions.newlabel.append(mapped_label)

            pprint(vars(actions))
            self.apply_actions(iw, actions)

예제 #10

파일 보기

파일: fix_needs_template_issues.py 프로젝트: ysalimi/ansibullbot

def main():
    # need a file indexer to get the template
    FI = FileIndexer(checkoutdir='/tmp/fileindexer')
    FI.update()

    # get the expected sections
    tf_content = FI.get_file_content('.github/ISSUE_TEMPLATE.md')
    tf_sections = extract_template_sections(tf_content, header='#####')
    required_sections = [x.lower() for x in tf_sections.keys() if tf_sections[x]['required']]
    if not required_sections:
        required_sections = ['issue type', 'component name', 'ansible version', 'summary']
    section_order = list(tf_sections.items())
    section_order = sorted(section_order, key=lambda x: x[1]['index'])
    section_order = [x[0] for x in section_order]

    # all known possibilities
    section_names = ['PLUGIN NAME', 'ANSIBLE CONFIGURATION'] + section_order + ['ENVIRONMENT']

    # get the numbers
    script = "#!/bin/bash\n"
    script += "\n"
    script += "URL='https://github.com/ansible/ansible/issues?utf8=%E2%9C%93&q=is%3Aopen%20label%3Aneeds_template%20author%3Aansibot'\n"
    script += "PYTHONPATH=$(pwd) scripts/scrape_github_issues_url $URL\n"
    (rc, so, se) = runscript(script)
    numbers = json.loads(so)
    numbers = sorted(set(numbers))

    for idn,number in enumerate(numbers):
        print('{} {}|{}'.format(number,idn,len(numbers)))
        fixed = []
        iurl = 'https://api.github.com/repos/ansible/ansible/issues/{}'.format(number)
        irr = requests.get(iurl, headers=get_headers())
        idata = irr.json()

        curl = idata['comments_url']
        crr = requests.get(curl, headers=get_headers())
        comments = crr.json()
        if crr.links:
            print('paginated comments')
            nextp = [x for x in crr.links.items() if x[1]['rel'] == 'next'][0][1]['url']
            while nextp:
                nrr = requests.get(nextp, headers=get_headers())
                comments += nrr.json()
                try:
                    nextp = [x for x in nrr.links.items() if x[1]['rel'] == 'next'][0][1]['url']
                except:
                    nextp = None
            #import epdb; epdb.st()

        newbody = idata['body']

        # extract
        ts = run_template_extract(FI, newbody, number, 'issue', section_names)

        # cleanup
        if 'environment' in ts:
            ts['os / environment'] = ts['environment']
            ts.pop('environment', None)

        # what is missing?
        missing = [x for x in required_sections if x.lower() not in ts]
        if not missing:
            print('{} nothing missing'.format(number))
            continue

        # simple sed for this one
        if missing == ['component name'] and 'plugin name' in newbody.lower():
            if 'PLUGIN NAME' in newbody:
                newbody = newbody.replace('PLUGIN NAME', 'COMPONENT NAME')
            if 'Plugin Name' in newbody:
                newbody = newbody.replace('Plugin Name', 'Component Name')
            if 'plugin name' in newbody:
                newbody = newbody.replace('plugin name', 'component name')

            print('{} sed/plugin name/component name'.format(number))
            cr = requests.patch(iurl, headers=get_headers(), data=json.dumps({'body': newbody}))
            if cr.status_code != 200:
                print('failed to edit body {}'.format(idata['html_url']))
                import epdb; epdb.st()
            continue

        if 'summary' in missing:
            ts['summary'] = newbody
            missing.remove('summary')
            fixed.append('summary')

        if 'issue type' in missing:
            # get migrated issue
            try:
                mi = get_migrated_issue(idata['body'])
            except Exception as e:
                print(e)
                mi = None
            if mi:
                itype = None
                # get issue type label from migrated issue
                mi_labels = [x['name'] for x in mi['labels']]
                if 'bug_report' in mi_labels:
                    itype = 'Bug Report'
                elif 'feature_idea' in mi_labels:
                    itype = 'Feature Idea'
                elif 'docs_report' in mi_labels:
                    itype = 'Documentation Report'

                if itype is not None:
                    ts['issue type'] = itype
                    missing.remove('issue type')
                    fixed.append('issue type')

        if 'component name' in missing:
            component = find_component(idata, ts, newbody, comments)
            if component:
                missing.remove('component name')
            ts['component name'] = component
            fixed.append('component name')

        if 'ansible version' in missing:
            labels = [x['name'] for x in idata['labels']]
            labels = [x for x in labels if x.startswith('affects_')]
            labels = sorted(set(labels))
            if labels:
                version = labels[0].replace('affects_', '')
            else:
                version = "N/A"
            missing.remove('ansible version')
            ts['ansible version'] = version
            fixed.append('ansible version')

        if not missing:
            print('# {}'.format(idata['html_url']))
            print('# title: {}'.format(idata['title']))
            print('# component: {}'.format(ts['component name']))
            print('# version: {}'.format(ts['ansible version']))
            print('# fixed: {}'.format(fixed))

            newbody = render_body(ts, section_order)
            print('<====================================================>')
            print(newbody)
            print('<====================================================>')
            import epdb; epdb.st()

            cr = requests.patch(iurl, headers=get_headers(), data=json.dumps({'body': newbody}))
            if cr.status_code != 200:
                print('failed to edit body {}'.format(idata['html_url']))
                import epdb; epdb.st()
            continue


        print('no solution(s) for {} {}'.format(idata['html_url'], missing))

    print('DONE')

예제 #11

파일 보기

파일: component_tools.py 프로젝트: techtonik/ansibullbot

class AnsibleComponentMatcher(object):

    BOTMETA = {}
    INDEX = {}
    REPO = 'https://github.com/ansible/ansible'
    STOPWORDS = ['ansible', 'core', 'plugin']
    STOPCHARS = ['"', "'", '(', ')', '?', '*', '`', ',', ':', '?', '-']
    BLACKLIST = ['new module', 'new modules']
    FILE_NAMES = []
    MODULES = {}
    MODULE_NAMES = []
    MODULE_NAMESPACE_DIRECTORIES = []

    # FIXME: THESE NEED TO GO INTO BOTMETA
    # ALSO SEE search_by_regex_generic ...
    KEYWORDS = {
        'all': None,
        'ansiballz': 'lib/ansible/executor/module_common.py',
        'ansible-console': 'lib/ansible/cli/console.py',
        'ansible-galaxy': 'lib/ansible/galaxy',
        'ansible-inventory': 'lib/ansible/cli/inventory.py',
        'ansible-playbook': 'lib/ansible/playbook',
        'ansible playbook': 'lib/ansible/playbook',
        'ansible playbooks': 'lib/ansible/playbook',
        'ansible-pull': 'lib/ansible/cli/pull.py',
        'ansible-vault': 'lib/ansible/parsing/vault',
        'ansible-vault edit': 'lib/ansible/parsing/vault',
        'ansible-vault show': 'lib/ansible/parsing/vault',
        'ansible-vault decrypt': 'lib/ansible/parsing/vault',
        'ansible-vault encrypt': 'lib/ansible/parsing/vault',
        'async': 'lib/ansible/modules/utilities/logic/async_wrapper.py',
        'become': 'lib/ansible/playbook/become.py',
        'block': 'lib/ansible/playbook/block.py',
        'blocks': 'lib/ansible/playbook/block.py',
        'callback plugin': 'lib/ansible/plugins/callback',
        'callback plugins': 'lib/ansible/plugins/callback',
        'conditional': 'lib/ansible/playbook/conditional.py',
        'docs': 'docs',
        'delegate_to': 'lib/ansible/playbook/task.py',
        'facts': 'lib/ansible/module_utils/facts',
        'galaxy': 'lib/ansible/galaxy',
        'groupvars': 'lib/ansible/vars/hostvars.py',
        'group vars': 'lib/ansible/vars/hostvars.py',
        'handlers': 'lib/ansible/playbook/handler.py',
        'hostvars': 'lib/ansible/vars/hostvars.py',
        'host vars': 'lib/ansible/vars/hostvars.py',
        'integration tests': 'test/integration',
        'inventory script': 'contrib/inventory',
        'jinja2 template system': 'lib/ansible/template',
        'module_utils': 'lib/ansible/module_utils',
        'multiple modules': None,
        'new module(s) request': None,
        'new modules request': None,
        'new module request': None,
        'new module': None,
        'network_cli': 'lib/ansible/plugins/connection/network_cli.py',
        'network_cli.py': 'lib/ansible/plugins/connection/network_cli.py',
        'network modules': 'lib/ansible/modules/network',
        'paramiko': 'lib/ansible/plugins/connection/paramiko_ssh.py',
        'role': 'lib/ansible/playbook/role',
        'roles': 'lib/ansible/playbook/role',
        'ssh': 'lib/ansible/plugins/connection/ssh.py',
        'ssh authentication': 'lib/ansible/plugins/connection/ssh.py',
        'setup / facts': 'lib/ansible/modules/system/setup.py',
        'setup': 'lib/ansible/modules/system/setup.py',
        'task executor': 'lib/ansible/executor/task_executor.py',
        'testing': 'test/',
        'validate-modules': 'test/sanity/validate-modules',
        'vault': 'lib/ansible/parsing/vault',
        'vault edit': 'lib/ansible/parsing/vault',
        'vault documentation': 'lib/ansible/parsing/vault',
        'with_items': 'lib/ansible/playbook/loop_control.py',
        'windows modules': 'lib/ansible/modules/windows',
        'winrm': 'lib/ansible/plugins/connection/winrm.py'
    }

    def __init__(self, gitrepo=None, botmetafile=None, cachedir=None, email_cache=None, file_indexer=None):
        self.cachedir = cachedir
        self.botmetafile = botmetafile
        self.email_cache = email_cache

        if file_indexer:
            self.file_indexer = file_indexer
        else:
            self.file_indexer = FileIndexer(
                botmetafile=self.botmetafile,
                checkoutdir=self.cachedir
            )

        if gitrepo:
            self.gitrepo = gitrepo
        else:
            self.gitrepo = GitRepoWrapper(cachedir=self.cachedir, repo=self.REPO)

        self.strategy = None
        self.strategies = []

        self.indexed_at = False
        self.updated_at = None
        self.update()

    def update(self, email_cache=None):
        if email_cache:
            self.email_cache = email_cache
        self.gitrepo.update()
        self.index_files()
        self.indexed_at = datetime.datetime.now()
        self.cache_keywords()
        self.updated_at = datetime.datetime.now()

    def index_files(self):

        self.BOTMETA = {}
        self.MODULES = {}
        self.MODULE_NAMES = []
        self.MODULE_NAMESPACE_DIRECTORIES = []

        self.load_meta()

        for fn in self.gitrepo.module_files:
            if os.path.isdir(fn):
                continue
            mname = os.path.basename(fn)
            mname = mname.replace('.py', '').replace('.ps1', '')
            if mname.startswith('__'):
                continue
            mdata = {
                'name': mname,
                'repo_filename': fn,
                'filename': fn
            }
            if fn not in self.MODULES:
                self.MODULES[fn] = mdata.copy()
            else:
                self.MODULES[fn].update(mdata)

        self.MODULE_NAMESPACE_DIRECTORIES = [os.path.dirname(x) for x in self.gitrepo.module_files]
        self.MODULE_NAMESPACE_DIRECTORIES = sorted(set(self.MODULE_NAMESPACE_DIRECTORIES))

        # make a list of names by enumerating the files
        self.MODULE_NAMES = [os.path.basename(x) for x in self.gitrepo.module_files]
        self.MODULE_NAMES = [x for x in self.MODULE_NAMES if x.endswith('.py') or x.endswith('.ps1')]
        self.MODULE_NAMES = [x.replace('.ps1', '').replace('.py', '') for x in self.MODULE_NAMES]
        self.MODULE_NAMES = [x for x in self.MODULE_NAMES if not x.startswith('__')]
        self.MODULE_NAMES = sorted(set(self.MODULE_NAMES))

        # make a list of names by calling ansible-doc
        checkoutdir = self.gitrepo.checkoutdir
        checkoutdir = os.path.abspath(checkoutdir)
        cmd = '. {}/hacking/env-setup; ansible-doc -t module -F'.format(checkoutdir)
        logging.debug(cmd)
        (rc, so, se) = run_command(cmd, cwd=checkoutdir)
        if rc:
            raise Exception("'ansible-doc' command failed (%s, %s %s)" % (rc, so, se))
        lines = so.split('\n')
        for line in lines:

            parts = line.split()
            parts = [x.strip() for x in parts]

            if len(parts) != 2 or checkoutdir not in line:
                continue

            mname = parts[0]
            if mname not in self.MODULE_NAMES:
                self.MODULE_NAMES.append(mname)

            fpath = parts[1]
            fpath = fpath.replace(checkoutdir + '/', '')

            if fpath not in self.MODULES:
                self.MODULES[fpath] = {
                    'name': mname,
                    'repo_filename': fpath,
                    'filename': fpath
                }

        _modules = self.MODULES.copy()
        for k, v in _modules.items():
            kparts = os.path.splitext(k)
            if kparts[-1] == '.ps1':
                _k = kparts[0] + '.py'
                checkpath = os.path.join(checkoutdir, _k)
                if not os.path.isfile(checkpath):
                    _k = k
            else:
                _k = k
            ME = ModuleExtractor(os.path.join(checkoutdir, _k), email_cache=self.email_cache)
            if k not in self.BOTMETA['files']:
                self.BOTMETA['files'][k] = {
                    'deprecated': os.path.basename(k).startswith('_'),
                    'labels': os.path.dirname(k).split('/'),
                    'authors': ME.authors,
                    'maintainers': ME.authors,
                    'maintainers_keys': [],
                    'notified': ME.authors,
                    'ignored': [],
                    'support': ME.metadata.get('supported_by', 'community'),
                    'metadata': ME.metadata.copy()
                }
            else:
                bmeta = self.BOTMETA['files'][k].copy()
                bmeta['metadata'] = ME.metadata.copy()
                if 'notified' not in bmeta:
                    bmeta['notified'] = []
                if 'maintainers' not in bmeta:
                    bmeta['maintainers'] = []
                if not bmeta.get('supported_by'):
                    bmeta['supported_by'] = ME.metadata.get('supported_by', 'community')
                if 'authors' not in bmeta:
                    bmeta['authors'] = []
                for x in ME.authors:
                    if x not in bmeta['authors']:
                        bmeta['authors'].append(x)
                    if x not in bmeta['maintainers']:
                        bmeta['maintainers'].append(x)
                    if x not in bmeta['notified']:
                        bmeta['notified'].append(x)
                if not bmeta.get('labels'):
                    bmeta['labels'] = os.path.dirname(k).split('/')
                bmeta['deprecated'] = os.path.basename(k).startswith('_')
                self.BOTMETA['files'][k].update(bmeta)

            # clean out the ignorees
            if 'ignored' in self.BOTMETA['files'][k]:
                for ignoree in self.BOTMETA['files'][k]['ignored']:
                    for thiskey in ['maintainers', 'notified']:
                        while ignoree in self.BOTMETA['files'][k][thiskey]:
                            self.BOTMETA['files'][k][thiskey].remove(ignoree)

            # write back to the modules
            self.MODULES[k].update(self.BOTMETA['files'][k])

    def load_meta(self):
        if self.botmetafile is not None:
            with open(self.botmetafile, 'rb') as f:
                rdata = f.read()
        else:
            fp = '.github/BOTMETA.yml'
            rdata = self.gitrepo.get_file_content(fp)
        self.BOTMETA = BotMetadataParser.parse_yaml(rdata)

    def cache_keywords(self):
        for k, v in self.BOTMETA['files'].items():
            if not v.get('keywords'):
                continue
            for kw in v['keywords']:
                if kw not in self.KEYWORDS:
                    self.KEYWORDS[kw] = k

    def clean_body(self, body, internal=False):
        body = body.lower()
        body = body.strip()
        for SC in self.STOPCHARS:
            if body.startswith(SC):
                body = body.lstrip(SC)
                body = body.strip()
            if body.endswith(SC):
                body = body.rstrip(SC)
                body = body.strip()
            if internal and SC in body:
                body = body.replace(SC, '')
                body = body.strip()
        body = body.strip()
        return body

    def match(self, issuewrapper):
        iw = issuewrapper
        matchdata = self.match_components(
            iw.title,
            iw.body,
            iw.template_data.get('component_raw'),
            files=iw.files
        )
        return matchdata

    def match_components(self, title, body, component, files=None):
        """Make a list of matching files with metadata"""

        self.strategy = None
        self.strategies = []

        # No matching necessary for PRs, but should provide consistent api
        if files:
            matched_filenames = files[:]
        else:
            matched_filenames = []
            if component is None:
                return matched_filenames

            component = component.encode('ascii', 'ignore')
            logging.debug('match "{}"'.format(component))

            delimiters = ['\n', ',', ' + ', ' & ']
            delimited = False
            for delimiter in delimiters:
                if delimiter in component:
                    delimited = True
                    components = component.split(delimiter)
                    for _component in components:
                        _matches = self._match_component(title, body, _component)
                        self.strategies.append(self.strategy)

                        # bypass for blacklist
                        if None in _matches:
                            _matches = []

                        matched_filenames += _matches

                    # do not process any more delimiters
                    break

            if not delimited:
                matched_filenames += self._match_component(title, body, component)
                self.strategies.append(self.strategy)

                # bypass for blacklist
                if None in matched_filenames:
                    return []

            # reduce subpaths
            if matched_filenames:
                matched_filenames = self.reduce_filepaths(matched_filenames)

        # create metadata for each matched file
        component_matches = []
        matched_filenames = sorted(set(matched_filenames))
        for fn in matched_filenames:
            component_matches.append(self.get_meta_for_file(fn))

        return component_matches

    def _match_component(self, title, body, component):
        """Find matches for a single line"""
        matched_filenames = []

        # context sets the path prefix to narrow the search window
        if 'module_util' in title.lower() or 'module_util' in component.lower():
            context = 'lib/ansible/module_utils'
        elif 'module util' in title.lower() or 'module util' in component.lower():
            context = 'lib/ansible/module_utils'
        elif 'module' in title.lower() or 'module' in component.lower():
            context = 'lib/ansible/modules'
        elif 'dynamic inventory' in title.lower() or 'dynamic inventory' in component.lower():
            context = 'contrib/inventory'
        elif 'inventory script' in title.lower() or 'inventory script' in component.lower():
            context = 'contrib/inventory'
        elif 'inventory plugin' in title.lower() or 'inventory plugin' in component.lower():
            context = 'lib/ansible/plugins/inventory'
        else:
            context = None

        if not component:
            return []

        if component not in self.STOPWORDS and component not in self.STOPCHARS:

            if not matched_filenames:
                matched_filenames += self.search_by_keywords(component, exact=True)
                if matched_filenames:
                    self.strategy = 'search_by_keywords'

            if not matched_filenames:
                matched_filenames += self.search_by_module_name(component)
                if matched_filenames:
                    self.strategy = 'search_by_module_name'

            if not matched_filenames:
                matched_filenames += self.search_by_regex_module_globs(component)
                if matched_filenames:
                    self.strategy = 'search_by_regex_module_globs'

            if not matched_filenames:
                matched_filenames += self.search_by_regex_modules(component)
                if matched_filenames:
                    self.strategy = 'search_by_regex_modules'

            if not matched_filenames:
                matched_filenames += self.search_by_regex_generic(component)
                if matched_filenames:
                    self.strategy = 'search_by_regex_generic'

            if not matched_filenames:
                matched_filenames += self.search_by_regex_urls(component)
                if matched_filenames:
                    self.strategy = 'search_by_regex_urls'

            if not matched_filenames:
                matched_filenames += self.search_by_tracebacks(component)
                if matched_filenames:
                    self.strategy = 'search_by_tracebacks'

            if not matched_filenames:
                matched_filenames += self.search_by_filepath(component, context=context)
                if matched_filenames:
                    self.strategy = 'search_by_filepath'
                if not matched_filenames:
                    matched_filenames += self.search_by_filepath(component, partial=True)
                    if matched_filenames:
                        self.strategy = 'search_by_filepath[partial]'

            if not matched_filenames:
                matched_filenames += self.search_by_keywords(component, exact=False)
                if matched_filenames:
                    self.strategy = 'search_by_keywords!exact'

            if matched_filenames:
                matched_filenames += self.include_modules_from_test_targets(matched_filenames)

        return matched_filenames

    def search_by_module_name(self, component):
        matches = []

        component = self.clean_body(component)

        # docker-container vs. docker_container
        if component not in self.MODULE_NAMES:
            component = component.replace('-', '_')

        if component in self.MODULE_NAMES:
            mmatch = self.find_module_match(component)
            if mmatch:
                if isinstance(mmatch, list):
                    for x in mmatch:
                        matches.append(x['repo_filename'])
                else:
                    matches.append(mmatch['repo_filename'])

        return matches

    def search_by_keywords(self, component, exact=True):
        """Simple keyword search"""

        component = component.lower()
        matches = []
        if component in self.STOPWORDS:
            matches = [None]
        elif component in self.KEYWORDS:
            matches = [self.KEYWORDS[component]]
        elif not exact:
            for k, v in self.KEYWORDS.items():
                if ' ' + k + ' ' in component or ' ' + k + ' ' in component.lower():
                    logging.debug('keyword match: {}'.format(k))
                    matches.append(v)
                elif ' ' + k + ':' in component or ' ' + k + ':' in component:
                    logging.debug('keyword match: {}'.format(k))
                    matches.append(v)
                elif component.endswith(' ' + k) or component.lower().endswith(' ' + k):
                    logging.debug('keyword match: {}'.format(k))
                    matches.append(v)

                elif (k in component or k in component.lower()) and k in self.BLACKLIST:
                    logging.debug('blacklist  match: {}'.format(k))
                    matches.append(None)

        return matches

    def search_by_regex_urls(self, body):
        # http://docs.ansible.com/ansible/latest/copy_module.html
        # http://docs.ansible.com/ansible/latest/dev_guide/developing_modules.html
        # http://docs.ansible.com/ansible/latest/postgresql_db_module.html
        # [helm module](https//docs.ansible.com/ansible/2.4/helm_module.html)
        # Windows module: win_robocopy\nhttp://docs.ansible.com/ansible/latest/win_robocopy_module.html
        # Examples:\n* archive (https://docs.ansible.com/ansible/archive_module.html)\n* s3_sync (https://docs.ansible.com/ansible/s3_sync_module.html)
        # https//github.com/ansible/ansible/blob/devel/lib/ansible/modules/windows/win_dsc.ps1L228

        matches = []

        urls = re.findall(
            'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
            body
        )
        if urls:
            for url in urls:
                url = url.rstrip(')')
                if '/blob' in url and url.endswith('.py'):
                    parts = url.split('/')
                    bindex = parts.index('blob')
                    fn = '/'.join(parts[bindex+2:])
                    matches.append(fn)
                elif '_module.html' in url:
                    parts = url.split('/')
                    fn = parts[-1].replace('_module.html', '')
                    choices = [x for x in self.gitrepo.files if '/' + fn in x or '/_' + fn in x]
                    choices = [x for x in choices if 'lib/ansible/modules' in x]

                    if len(choices) > 1:
                        choices = [x for x in choices if '/' + fn + '.py' in x or '/' + fn + '.ps1' in x or '/_' + fn + '.py' in x]

                    if not choices:
                        pass
                    elif len(choices) == 1:
                        matches.append(choices[0])
                    else:
                        pass
                else:
                    pass

        return matches

    def search_by_regex_modules(self, body):
        # foo module
        # foo and bar modules
        # foo* modules
        # foo* module

        body = body.lower()
        logging.debug('attempt regex match on: {}'.format(body))

        # https://www.tutorialspoint.com/python/python_reg_expressions.htm
        patterns = [
            r'\:\n(\S+)\.py',
            r'(\S+)\.py',
            r'\-(\s+)(\S+)(\s+)module',
            r'\`ansible_module_(\S+)\.py\`',
            r'module(\s+)\-(\s+)(\S+)',
            r'module(\s+)(\S+)',
            r'\`(\S+)\`(\s+)module',
            r'(\S+)(\s+)module',
            r'the (\S+) command',
            r'(\S+) \(.*\)',
            r'(\S+)\-module',
            r'modules/(\S+)',
            r'module\:(\s+)\`(\S+)\`',
            r'module\: (\S+)',
            r'module (\S+)',
            r'module `(\S+)`',
            r'module: (\S+)',
            r'new (\S+) module',
            r'the (\S+) module',
            r'the \"(\S+)\" module',
            r':\n(\S+) module',
            r'(\S+) module',
            r'(\S+) core module',
            r'(\S+) extras module',
            r':\n\`(\S+)\` module',
            r'\`(\S+)\` module',
            r'`(\S+)` module',
            r'(\S+)\* modules',
            r'(\S+) and (\S+)',
            r'(\S+) or (\S+)',
            r'(\S+) \+ (\S+)',
            r'(\S+) \& (\S)',
            r'(\S+) and (\S+) modules',
            r'(\S+) or (\S+) module',
            r'(\S+)_module',
            r'action: (\S+)',
            r'action (\S+)',
            r'ansible_module_(\S+)\.py',
            r'ansible_module_(\S+)',
            r'ansible_modules_(\S+)\.py',
            r'ansible_modules_(\S+)',
            r'(\S+) task',
            r'(\s+)\((\S+)\)',
            r'(\S+)(\s+)(\S+)(\s+)modules',
            r'(\S+)(\s+)module\:(\s+)(\S+)',
            r'\-(\s+)(\S+)(\s+)module',
            r'\:(\s+)(\S+)(\s+)module',
            r'\-(\s+)ansible(\s+)(\S+)(\s+)(\S+)(\s+)module',
            r'.*(\s+)(\S+)(\s+)module.*'
        ]

        matches = []

        logging.debug('check patterns against: {}'.format(body))

        for pattern in patterns:
            mobj = re.match(pattern, body, re.M | re.I)

            if mobj:
                logging.debug('pattern {} matched on "{}"'.format(pattern, body))

                for x in range(0, mobj.lastindex+1):
                    try:
                        mname = mobj.group(x)
                        logging.debug('mname: {}'.format(mname))
                        if mname == body:
                            continue
                        mname = self.clean_body(mname)
                        if not mname.strip():
                            continue
                        mname = mname.strip().lower()
                        if ' ' in mname:
                            continue
                        if '/' in mname:
                            continue

                        mname = mname.replace('.py', '').replace('.ps1', '')
                        logging.debug('--> {}'.format(mname))

                        # attempt to match a module
                        module_match = self.find_module_match(mname)

                        if not module_match:
                            pass
                        elif isinstance(module_match, list):
                            for m in module_match:
                                matches.append(m['repo_filename'])
                        elif isinstance(module_match, dict):
                            matches.append(module_match['repo_filename'])
                    except Exception as e:
                        logging.error(e)

                if matches:
                    break

        return matches

    def search_by_regex_module_globs(self, body):
        # All AWS modules
        # BigIP modules
        # NXOS modules
        # azurerm modules

        matches = []
        body = self.clean_body(body)
        logging.debug('try globs on: {}'.format(body))

        keymap = {
            'all': None,
            'ec2': 'lib/ansible/modules/cloud/amazon',
            'ec2_*': 'lib/ansible/modules/cloud/amazon',
            'aws': 'lib/ansible/modules/cloud/amazon',
            'amazon': 'lib/ansible/modules/cloud/amazon',
            'google': 'lib/ansible/modules/cloud/google',
            'gce': 'lib/ansible/modules/cloud/google',
            'gcp': 'lib/ansible/modules/cloud/google',
            'bigip': 'lib/ansible/modules/network/f5',
            'nxos': 'lib/ansible/modules/network/nxos',
            'azure': 'lib/ansible/modules/cloud/azure',
            'azurerm': 'lib/ansible/modules/cloud/azure',
            'openstack': 'lib/ansible/modules/cloud/openstack',
            'ios': 'lib/ansible/modules/network/ios',
        }

        regexes = [
            r'(\S+) ansible modules',
            r'all (\S+) based modules',
            r'all (\S+) modules',
            r'.* all (\S+) modules.*',
            r'(\S+) modules',
            r'(\S+\*) modules',
            r'all cisco (\S+\*) modules',
        ]

        mobj = None
        for x in regexes:
            mobj = re.match(x, body)
            if mobj:
                logging.debug('matched glob: {}'.format(x))
                break

        if not mobj:
            logging.debug('no glob matches')

        if mobj:
            keyword = mobj.group(1)
            if not keyword.strip():
                pass
            elif keyword in keymap:
                if keymap[keyword]:
                    matches.append(keymap[keyword])
            else:

                if '*' in keyword:
                    keyword = keyword.replace('*', '')

                # check for directories first
                fns = [x for x in self.MODULE_NAMESPACE_DIRECTORIES if keyword in x]

                # check for files second
                if not fns:
                    fns = [x for x in self.gitrepo.module_files if 'lib/ansible/modules' in x and keyword in x]

                if fns:
                    matches += fns

        if matches:
            matches = sorted(set(matches))

        return matches

    def search_by_regex_generic(self, body):
        # foo dynamic inventory script
        # foo filter

        # https://www.tutorialspoint.com/python/python_reg_expressions.htm
        patterns = [
            [r'(.*) action plugin', 'lib/ansible/plugins/action'],
            [r'(.*) inventory plugin', 'lib/ansible/plugins/inventory'],
            [r'(.*) dynamic inventory', 'contrib/inventory'],
            [r'(.*) dynamic inventory (script|file)', 'contrib/inventory'],
            [r'(.*) inventory script', 'contrib/inventory'],
            [r'(.*) filter', 'lib/ansible/plugins/filter'],
            [r'(.*) jinja filter', 'lib/ansible/plugins/filter'],
            [r'(.*) jinja2 filter', 'lib/ansible/plugins/filter'],
            [r'(.*) template filter', 'lib/ansible/plugins/filter'],
            [r'(.*) fact caching plugin', 'lib/ansible/plugins/cache'],
            [r'(.*) fact caching module', 'lib/ansible/plugins/cache'],
            [r'(.*) lookup plugin', 'lib/ansible/plugins/lookup'],
            [r'(.*) lookup', 'lib/ansible/plugins/lookup'],
            [r'(.*) callback plugin', 'lib/ansible/plugins/callback'],
            [r'(.*)\.py callback', 'lib/ansible/plugins/callback'],
            [r'callback plugin (.*)', 'lib/ansible/plugins/callback'],
            [r'(.*) stdout callback', 'lib/ansible/plugins/callback'],
            [r'stdout callback (.*)', 'lib/ansible/plugins/callback'],
            [r'stdout_callback (.*)', 'lib/ansible/plugins/callback'],
            [r'(.*) callback plugin', 'lib/ansible/plugins/callback'],
            [r'(.*) connection plugin', 'lib/ansible/plugins/connection'],
            [r'(.*) connection type', 'lib/ansible/plugins/connection'],
            [r'(.*) connection', 'lib/ansible/plugins/connection'],
            [r'(.*) transport', 'lib/ansible/plugins/connection'],
            [r'connection=(.*)', 'lib/ansible/plugins/connection'],
            [r'connection: (.*)', 'lib/ansible/plugins/connection'],
            [r'connection (.*)', 'lib/ansible/plugins/connection'],
            [r'strategy (.*)', 'lib/ansible/plugins/strategy'],
            [r'(.*) strategy plugin', 'lib/ansible/plugins/strategy'],
            [r'(.*) module util', 'lib/ansible/module_utils'],
            [r'ansible-galaxy (.*)', 'lib/ansible/galaxy'],
            [r'ansible-playbook (.*)', 'lib/ansible/playbook'],
            [r'ansible/module_utils/(.*)', 'lib/ansible/module_utils'],
            [r'module_utils/(.*)', 'lib/ansible/module_utils'],
            [r'lib/ansible/module_utils/(.*)', 'lib/ansible/module_utils'],
            [r'(\S+) documentation fragment', 'lib/ansible/utils/module_docs_fragments'],
        ]

        body = self.clean_body(body)

        matches = []

        for pattern in patterns:
            mobj = re.match(pattern[0], body, re.M | re.I)

            if mobj:
                logging.debug('pattern hit: {}'.format(pattern))
                fname = mobj.group(1)
                fname = fname.lower()

                fpath = os.path.join(pattern[1], fname)

                if fpath in self.gitrepo.files:
                    matches.append(fpath)
                elif os.path.join(pattern[1], fname + '.py') in self.gitrepo.files:
                    fname = os.path.join(pattern[1], fname + '.py')
                    matches.append(fname)
                else:
                    # fallback to the directory
                    matches.append(pattern[1])

        return matches

    def search_by_tracebacks(self, body):

        matches = []

        if 'Traceback (most recent call last)' in body:
            lines = body.split('\n')
            for line in lines:
                line = line.strip()
                if line.startswith('DistributionNotFound'):
                    matches = ['setup.py']
                    break
                elif line.startswith('File'):
                    fn = line.split()[1]
                    for SC in self.STOPCHARS:
                        fn = fn.replace(SC, '')
                    if 'ansible_module_' in fn:
                        fn = os.path.basename(fn)
                        fn = fn.replace('ansible_module_', '')
                        matches = [fn]
                    elif 'cli/playbook.py' in fn:
                        fn = 'lib/ansible/cli/playbook.py'
                    elif 'module_utils' in fn:
                        idx = fn.find('module_utils/')
                        fn = 'lib/ansible/' + fn[idx:]
                    elif 'ansible/' in fn:
                        idx = fn.find('ansible/')
                        fn1 = fn[idx:]

                        if 'bin/' in fn1:
                            if not fn1.startswith('bin'):

                                idx = fn1.find('bin/')
                                fn1 = fn1[idx:]

                                if fn1.endswith('.py'):
                                    fn1 = fn1.rstrip('.py')

                        elif 'cli/' in fn1:
                            idx = fn1.find('cli/')
                            fn1 = fn1[idx:]
                            fn1 = 'lib/ansible/' + fn1

                        elif 'lib' not in fn1:
                            fn1 = 'lib/' + fn1

                        if fn1 not in self.files:
                            pass

        return matches

    def search_by_filepath(self, body, partial=False, context=None):
        """Find known filepaths in body"""

        matches = []
        body = self.clean_body(body)

        if not body:
            return []
        if body.lower() in self.STOPCHARS:
            return []
        if body.lower() in self.STOPWORDS:
            return []

        # 'inventory manager' vs. 'inventory/manager'
        if partial and ' ' in body:
            body = body.replace(' ', '/')

        if 'site-packages' in body:
            res = re.match('(.*)/site-packages/(.*)', body)
            body = res.group(2)
        if 'modules/core/' in body:
            body = body.replace('modules/core/', 'modules/')
        if 'modules/extras/' in body:
            body = body.replace('modules/extras/', 'modules/')
        if 'ansible-modules-core/' in body:
            body = body.replace('ansible-modules-core/', '/')
        if 'ansible-modules-extras/' in body:
            body = body.replace('ansible-modules-extras/', '/')
        if body.startswith('ansible/lib/ansible'):
            body = body.replace('ansible/lib', 'lib')
        if body.startswith('ansible/') and not body.startswith('ansible/modules'):
            body = body.replace('ansible/', '', 1)
        if 'module/' in body:
            body = body.replace('module/', 'modules/')

        logging.debug('search filepath [{}] [{}]: {}'.format(context, partial, body))

        if len(body) < 2:
            return []

        if '/' in body:
            body_paths = body.split('/')
        elif ' ' in body:
            body_paths = body.split()
            body_paths = [x.strip() for x in body_paths if x.strip()]
        else:
            body_paths = [body]

        if 'networking' in body_paths:
            ix = body_paths.index('networking')
            body_paths[ix] = 'network'
        if 'plugin' in body_paths:
            ix = body_paths.index('plugin')
            body_paths[ix] = 'plugins'

        if not context or 'lib/ansible/modules' in context:
            mmatch = self.find_module_match(body)
            if mmatch:
                if isinstance(mmatch, list) and len(mmatch) > 1:

                    # only allow for exact prefix globbing here ...
                    if [x for x in mmatch if x['repo_filename'].startswith(body)]:
                        return [x['repo_filename'] for x in mmatch]

                elif isinstance(mmatch, list):
                    return [x['repo_filename'] for x in mmatch]
                else:
                    return [mmatch['repo_filename']]

        if body in self.gitrepo.files:
            matches = [body]
        else:
            for fn in self.gitrepo.files:

                # limit the search set if a context is given
                if context is not None and not fn.startswith(context):
                    continue

                if fn.endswith(body) or fn.endswith(body + '.py') or fn.endswith(body + '.ps1'):
                    # ios_config.py -> test_ios_config.py vs. ios_config.py
                    bn1 = os.path.basename(body)
                    bn2 = os.path.basename(fn)
                    if bn2.startswith(bn1):
                        matches = [fn]
                        break

                if partial:

                    # netapp_e_storagepool storage module
                    # lib/ansible/modules/storage/netapp/netapp_e_storagepool.py

                    # if all subpaths are in this filepath, it is a match
                    bp_total = 0
                    fn_paths = fn.split('/')
                    fn_paths.append(fn_paths[-1].replace('.py', '').replace('.ps1', ''))

                    for bp in body_paths:
                        if bp in fn_paths:
                            bp_total += 1

                    if bp_total == len(body_paths):
                        matches = [fn]
                        break

                    elif bp_total > 1:

                        if (float(bp_total) / float(len(body_paths))) >= (2.0 / 3.0):
                            if fn not in matches:
                                matches.append(fn)

        if matches:
            tr = []
            for match in matches[:]:
                # reduce to longest path
                for m in matches:
                    if match == m:
                        continue
                    if len(m) < match and match.startswith(m):
                        tr.append(m)

            for r in tr:
                if r in matches:
                    logging.debug('trimming {}'.format(r))
                    matches.remove(r)

        matches = sorted(set(matches))
        logging.debug('return: {}'.format(matches))

        return matches

    def reduce_filepaths(self, matches):

        # unique
        _matches = []
        for _match in matches:
            if _match not in _matches:
                _matches.append(_match)
        matches = _matches[:]

        # squash to longest path
        if matches:
            tr = []
            for match in matches[:]:
                # reduce to longest path
                for m in matches:
                    if match == m:
                        continue
                    if m is None or match is None:
                        continue
                    if len(m) < match and match.startswith(m) or match.endswith(m):
                        tr.append(m)

            for r in tr:
                if r in matches:
                    matches.remove(r)
        return matches

    def include_modules_from_test_targets(self, matches):
        """Map test targets to the module files"""
        new_matches = []
        for match in matches:
            if not match:
                continue
            # include modules from test targets
            if 'test/integration/targets' in match:
                paths = match.split('/')
                tindex = paths.index('targets')
                mname = paths[tindex+1]
                mrs = self.find_module_match(mname, exact=True)
                if mrs:
                    if not isinstance(mrs, list):
                        mrs = [mrs]
                    for mr in mrs:
                        new_matches.append(mr['repo_filename'])
        return new_matches

    def get_meta_for_file(self, filename):
        meta = {
            'repo_filename': filename,
            'name': os.path.basename(filename).split('.')[0],
            'notify': [],
            'assign': [],
            'authors': [],
            'committers': [],
            'maintainers': [],
            'labels': [],
            'ignore': [],
            'support': None,
            'supported_by': None,
            'deprecated': False,
            'topic': None,
            'subtopic': None,
            'namespace': None,
            'namespace_maintainers': []
        }

        populated = False
        filenames = [filename, os.path.splitext(filename)[0]]

        # powershell meta is in the python file
        if filename.endswith('.ps1'):
            pyfile = filename.replace('.ps1', '.py')
            if pyfile in self.BOTMETA['files']:
                filenames.append(pyfile)

        botmeta_entries = self.file_indexer._filenames_to_keys(filenames)

        for entry in botmeta_entries:
            fdata = self.BOTMETA['files'][entry].copy()

            if 'authors' in fdata:
                meta['authors'] = fdata['authors']
            if 'maintainers' in fdata:
                meta['notify'] += fdata['maintainers']
                meta['assign'] += fdata['maintainers']
                meta['maintainers'] += fdata['maintainers']
            if 'notified' in fdata:
                meta['notify'] += fdata['notified']
            if 'labels' in fdata:
                meta['labels'] += fdata['labels']
            if 'ignore' in fdata:
                meta['ignore'] += fdata['ignore']
            if 'ignored' in fdata:
                meta['ignore'] += fdata['ignored']
            if 'support' in fdata:
                if isinstance(fdata['support'], list):
                    meta['support'] = fdata['support'][0]
                else:
                    meta['support'] = fdata['support']
            elif 'supported_by' in fdata:
                if isinstance(fdata['supported_by'], list):
                    meta['support'] = fdata['supported_by'][0]
                else:
                    meta['support'] = fdata['supported_by']

            if 'deprecated' in fdata:
                meta['deprecated'] = fdata['deprecated']

            populated = True

        # walk up the tree for more meta
        paths = filename.split('/')
        for idx, x in enumerate(paths):
            thispath = '/'.join(paths[:(0-idx)])
            if thispath in self.BOTMETA['files']:
                fdata = self.BOTMETA['files'][thispath].copy()
                if 'support' in fdata and not meta['support']:
                    if isinstance(fdata['support'], list):
                        meta['support'] = fdata['support'][0]
                    else:
                        meta['support'] = fdata['support']
                if 'labels' in fdata:
                    meta['labels'] += fdata['labels']
                if 'maintainers' in fdata:
                    meta['notify'] += fdata['maintainers']
                    meta['assign'] += fdata['maintainers']
                    meta['maintainers'] += fdata['maintainers']
                if 'ignore' in fdata:
                    meta['ignore'] += fdata['ignore']
                if 'notified' in fdata:
                    meta['notify'] += fdata['notified']

        if 'lib/ansible/modules' in filename:
            topics = [x for x in paths if x not in ['lib', 'ansible', 'modules']]
            topics = [x for x in topics if x != os.path.basename(filename)]
            if len(topics) == 2:
                meta['topic'] = topics[0]
                meta['subtopic'] = topics[1]
            elif len(topics) == 1:
                meta['topic'] = topics[0]

            meta['namespace'] = '/'.join(topics)

        # set namespace maintainers (skip !modules for now)
        if filename.startswith('lib/ansible/modules'):
            ns = meta.get('namespace')
            keys = self.BOTMETA['files'].keys()
            keys = [x for x in keys if x.startswith(os.path.join('lib/ansible/modules', ns))]
            ignored = []

            for key in keys:
                meta['namespace_maintainers'] += self.BOTMETA['files'][key].get('maintainers', [])
                ignored += self.BOTMETA['files'][key].get('ignored', [])

            for ignoree in ignored:
                while ignoree in meta['namespace_maintainers']:
                    meta['namespace_maintainers'].remove(ignoree)

        # new modules should default to "community" support
        if filename.startswith('lib/ansible/modules') and filename not in self.gitrepo.files:
            meta['support'] = 'community'
            meta['supported_by'] = 'community'

        # test targets for modules should inherit from their modules
        if filename.startswith('test/integration/targets') and filename not in self.BOTMETA['files']:
            whitelist = [
                'labels',
                'ignore',
                'deprecated',
                'authors',
                'assign',
                'maintainers',
                'notify',
                'topic',
                'subtopic',
                'support'
            ]

            paths = filename.split('/')
            tindex = paths.index('targets')
            mname = paths[tindex+1]
            mmatch = self._find_module_match(mname, exact=True)
            if mmatch:
                mmeta = self.get_meta_for_file(mmatch[0]['repo_filename'])
                for k, v in mmeta.items():
                    if k in whitelist and v:
                        if isinstance(meta[k], list):
                            meta[k] = sorted(set(meta[k] + v))
                        elif not meta[k]:
                            meta[k] = v

            # make new test targets community by default
            if not meta['support'] and not meta['supported_by']:
                meta['support'] = 'community'

        # it's okay to remove things from legacy-files.txt
        if filename == 'test/sanity/pep8/legacy-files.txt' and not meta['support']:
            meta['support'] = 'community'

        # fallback to core support
        if not meta['support']:
            meta['support'] = 'core'

        # align support and supported_by
        if meta['support'] != meta['supported_by']:
            if meta['support'] and not meta['supported_by']:
                meta['supported_by'] = meta['support']
            elif not meta['support'] and meta['supported_by']:
                meta['support'] = meta['supported_by']

        # clean up the result
        _meta = meta.copy()
        for k, v in _meta.items():
            if isinstance(v, list):
                meta[k] = sorted(set(v))

        # walk up the botmeta tree looking for ignores to include
        if meta.get('repo_filename'):
            namespace_paths = os.path.dirname(meta['repo_filename'])
            namespace_paths = namespace_paths.split('/')
            for x in reversed(range(0, len(namespace_paths) + 1)):
                this_ns_path = '/'.join(namespace_paths[:x])
                if not this_ns_path:
                    continue
                print('check {}'.format(this_ns_path))
                if this_ns_path in self.BOTMETA['files']:
                    this_ignore = self.BOTMETA['files'][this_ns_path].get('ignore') or \
                        self.BOTMETA['files'][this_ns_path].get('ignored') or \
                        self.BOTMETA['files'][this_ns_path].get('ignores')
                    print('ignored: {}'.format(this_ignore))
                    if this_ignore:
                        for username in this_ignore:
                            if username not in meta['ignore']:
                                meta['ignore'].append(username)

        # process ignores AGAIN.
        if meta.get('ignore'):
            for k, v in meta.items():
                if k == 'ignore':
                    continue
                if not isinstance(v, list):
                    continue
                for ignoree in meta['ignore']:
                    if ignoree in v:
                        meta[k].remove(ignoree)

        return meta

    def find_module_match(self, pattern, exact=False):
        '''Exact module name matching'''

        logging.debug('find_module_match for "{}"'.format(pattern))
        candidate = None

        BLACKLIST = [
            'module_utils',
            'callback',
            'network modules',
            'networking modules'
            'windows modules'
        ]

        if not pattern or pattern is None:
            return None

        # https://github.com/ansible/ansible/issues/19755
        if pattern == 'setup':
            pattern = 'lib/ansible/modules/system/setup.py'

        if '/facts.py' in pattern or ' facts.py' in pattern:
            pattern = 'lib/ansible/modules/system/setup.py'

        # https://github.com/ansible/ansible/issues/18527
        #   docker-container -> docker_container
        if '-' in pattern:
            pattern = pattern.replace('-', '_')

        if 'module_utils' in pattern:
            # https://github.com/ansible/ansible/issues/20368
            return None
        elif 'callback' in pattern:
            return None
        elif 'lookup' in pattern:
            return None
        elif 'contrib' in pattern and 'inventory' in pattern:
            return None
        elif pattern.lower() in BLACKLIST:
            return None

        candidate = self._find_module_match(pattern, exact=exact)

        if not candidate:
            candidate = self._find_module_match(os.path.basename(pattern))

        if not candidate and '/' in pattern and not pattern.startswith('lib/'):
            ppy = None
            ps1 = None
            if not pattern.endswith('.py') and not pattern.endswith('.ps1'):
                ppy = pattern + '.py'
            if not pattern.endswith('.py') and not pattern.endswith('.ps1'):
                ps1 = pattern + '.ps1'
            for mf in self.gitrepo.module_files:
                if pattern in mf:
                    if mf.endswith(pattern) or mf.endswith(ppy) or mf.endswith(ps1):
                        candidate = mf
                        break

        return candidate

    def _find_module_match(self, pattern, exact=False):

        logging.debug('matching on {}'.format(pattern))

        matches = []

        if isinstance(pattern, unicode):
            pattern = pattern.encode('ascii', 'ignore')

        logging.debug('_find_module_match: {}'.format(pattern))

        noext = pattern.replace('.py', '').replace('.ps1', '')

        # exact is looking for a very precise name such as "vmware_guest"
        if exact:
            candidates = [pattern]
        else:
            candidates = [pattern, '_' + pattern, noext, '_' + noext]

        for k, v in self.MODULES.items():
            if v['name'] in candidates:
                logging.debug('match {} on name: {}'.format(k, v['name']))
                matches = [v]
                break

        if not matches:
            # search by key ... aka the filepath
            for k, v in self.MODULES.items():
                if k == pattern:
                    logging.debug('match {} on key: {}'.format(k, k))
                    matches = [v]
                    break

        # spellcheck
        if not exact and not matches and '/' not in pattern:
            _pattern = pattern
            if not isinstance(_pattern, unicode):
                _pattern = _pattern.decode('utf-8')
            candidates = []
            for k, v in self.MODULES.items():
                vname = v['name']
                if not isinstance(vname, unicode):
                    vname = vname.decode('utf-8')
                jw = jaro_winkler(vname, _pattern)
                if jw > .9:
                    candidates.append((jw, k))
            for candidate in candidates:
                matches.append(self.MODULES[candidate[1]])

        return matches

예제 #12

파일 보기

def main():
    pprint(sys.argv)
    dest = sys.argv[1]
    print('dest: %s' % dest)

    # get_valid_labels('ansible/ansible')
    # /home/jtanner/.ansibullbot/cache/ansible/ansible/labels.pickle

    with open(os.path.expanduser('~/.ansibullbot/cache/ansible/ansible/labels.pickle'), 'rb') as f:
        labels = pickle.load(f)
    valid_labels = [x.name for x in labels[1]]

    FILEMAP_FILENAME = 'FILEMAP.json'
    COMPONENTMAP_FILENAME = 'COMPONENTMAP.json'
    FI = FileIndexer(
        checkoutdir=os.path.expanduser(
            '~/.ansibullbot/cache/ansible.files.checkout'
        ),
        cmap=COMPONENTMAP_FILENAME,
    )

    module_cache_file = '/tmp/mi-modules.json'
    if not os.path.isfile(module_cache_file):
        module_maintainers = get_maintainers_mapping()
        MI = ModuleIndexer(maintainers=module_maintainers)
        MI.get_ansible_modules()
        with open(module_cache_file, 'wb') as f:
            f.write(json.dumps(MI.modules, sort_keys=True, indent=2))
        modules = MI.modules
    else:
        with open(module_cache_file, 'rb') as f:
            modules = json.loads(f.read())

    macro_teams = {
        'Qalthos,gundalow,privateip': 'openswitch',
        'Qalthos,ganeshrn,gundalow,privateip,rcarrillocruz,trishnaguha': 'networking',
        'GGabriele,jedelman8,mikewiebe,privateip,rahushen,rcarrillocruz,trishnaguha': 'nxos',
        'emonty,j2sol,juliakreger,rcarrillocruz,shrews,thingee': 'openstack',
        'chrishoffman,manuel-sousa,romanek-adam': 'rabbitmq',
        'alikins,barnabycourt,flossware,vritant': 'rhn',
        'Qalthos,amitsi,gundalow,privateip': 'netvisor',
        'haroldwongms,nitzmahone,tstringer': 'azure',
        'dagwieers,jborean93,jhawkesworth': 'windows',
        'dagwieers,dav1x,jctanner': 'vmware',
        'isharacomix,jrrivers,privateip': 'cumulus',
        'chiradeep,giorgos-nikolopoulos': 'netscaler',
        'ericsysmin,grastogi23,khaltore': 'avi',
        'ghjm,jlaska,matburt,wwitzel3': 'tower',
        'hulquest,lmprice,timuster': 'netapp',
    }

    usermap = {
        'mpdehaan': False
    }
    namemap = {
        'Shrews': 'shrews'
    }
    exclusions = {
        '*': ['chouseknecht', 'Java1Guy', 'franckcuny', 'mhite', 'bennojoy', 'risaacson', 'whenrik'],
        'network/wakeonlan': ['dagwiers'],
    }

    removed = get_removed_maintainers()

    teams = {}
    data = {}
    data['files'] = {}

    # merge the moduleindexer data
    for k,v in modules.items():
        fp = v.get('filepath')
        if not fp or not fp.startswith('lib/ansible'):
            continue
        data['files'][k] = {}
        if v['_maintainers']:
            data['files'][k]['maintainers'] = []
            data['files'][k]['maintainers'] = [x for x in v['_maintainers']]
        if v['authors']:
            if 'maintainers' not in data['files'][k]:
                data['files'][k]['maintainers'] = []
            data['files'][k]['maintainers'] += v['authors']
            data['files'][k]['maintainers'] = sorted(set(data['files'][k]['maintainers']))

        # validate each maintainer exists
        if 'maintainers' in data['files'][k]:
            maintainers = []
            for x in data['files'][k]['maintainers']:

                if x in exclusions['*']:
                    continue

                if x in namemap:
                    x = namemap[x]
                if x in usermap:
                    if usermap[x]:
                        maintainers.append(x)
                else:
                    if x == 'ansible':
                        usermap['ansible'] = True
                        maintainers.append(x)
                        continue
                    res = requests.get('https://github.com/%s' % x)
                    if res.status_code == 200:
                        usermap[x] = True
                        maintainers.append(x)
                    else:
                        usermap[x] = False
            data['files'][k]['maintainers'] = sorted(set(maintainers))
            if not data['files'][k]['maintainers']:
                data['files'][k].pop('maintainers', None)

    # merge the removed people
    for k,v in removed.items():
        k = os.path.join('lib/ansible/modules', k)
        v = sorted(set(v))
        if k in data['files']:
            if 'maintainers' in data['files'][k]:
                for vx in v:
                    if vx in data['files'][k]['maintainers']:
                        data['files'][k]['maintainers'].remove(vx)
                        if 'ignored' not in data['files'][k]:
                            data['files'][k]['ignored'] = []
                        data['files'][k]['ignored'].append(vx)
                if not data['files'][k]['maintainers']:
                    data['files'][k].pop('maintainers', None)
                    #import epdb; epdb.st()

    # merge the fileindexer data
    for k in FI.files:
        #if 'contrib/inventory' in k:
        #    import epdb; epdb.st()
        #print(k)
        try:
            klabels = FI.get_component_labels(valid_labels, [k])
            if klabels:
                klabels = [x for x in klabels if not x.startswith('c:')]
                if not klabels:
                    continue
                if k not in data['files']:
                    data['files'][k] = {}
                if 'labels' not in data['files'][k]:
                    data['files'][k]['labels'] = []
                data['files'][k]['labels'] += klabels
        except UnicodeDecodeError:
            continue

        keywords = FI.get_keywords_for_file(k)
        if keywords:
            if k not in data['files']:
                data['files'][k] = {}
            if 'keywords' not in data['files'][k]:
                data['files'][k]['keywords'] = []
            data['files'][k]['keywords'] += keywords
            #import epdb; epdb.st()

    '''
    # calculate all teams
    for k,v in data['files'].items():
        if not v.get('maintainers'):
            continue
        maintainers = sorted(set(v['maintainers']))
        key = ','.join(maintainers)
        if key not in teams:
            teams[key] = []
        teams[key].append(k)

    # rank and show
    steams = sorted(teams, key=len, reverse=True)
    for x in steams[0:15]:
        if x in macro_teams:
            continue
        pprint(teams[x])
        print(x)
        import epdb; epdb.st()
    import epdb; epdb.st()
    '''

    for k,v in data['files'].items():
        if not v.get('maintainers'):
            continue
        maintainers = v.get('maintainers')
        for idx,x in enumerate(maintainers):
            if x == 'ansible':
                maintainers[idx] = '$team_ansible'
        if maintainers == ['$team_ansible']:
            data['files'][k]['maintainers'] = ' '.join(maintainers)
            continue
        if len(maintainers) == 1:
            data['files'][k]['maintainers'] = ' '.join(maintainers)
            continue
        mkey = ','.join(sorted(set(maintainers)))
        if mkey in macro_teams:
            maintainers = ['$team_%s' % macro_teams[mkey]]
            data['files'][k]['maintainers'] = ' '.join(maintainers)
        else:
            # partial matching
            match = None
            subnames = sorted(set(maintainers))
            for sn in subnames:
                filtered = [x for x in subnames if x != sn]
                fkey = ','.join(filtered)
                if fkey in macro_teams:
                    match = fkey
            if match:
                to_clear = match.split(',')
                maintainers = [x for x in maintainers if x not in to_clear]
                data['files'][k]['maintainers'] = ' '.join(maintainers)

    # fix deprecations
    safe_names = [x for x in FI.files if all(c in string.printable for c in x)]
    remove = []
    for k,v in data['files'].items():
        maintainers = v.get('maintainers')
        if maintainers:
            if 'DEPRECATED' in data['files'][k]['maintainers']:
                data['files'][k].pop('maintainers', None)
                data['files'][k]['deprecated'] = True
        bn = os.path.basename(k)
        if bn.startswith('_') and bn != '__init__.py' and '/modules/' in k:
            '''
            data['files'][k]['deprecated'] = True
            if 'maintainers' in data['files'][k]:
                data['files'][k].pop('maintainers', None)
            '''
            remove.append(k)

        # get rid of files no longer in the repo
        if k not in safe_names:
            remove.append(k)

    for x in remove:
        data['files'].pop(x, None)


    # remove any keys where maintainers == authors
    remove = []
    for k,v in data['files'].items():
        if v.keys() != ['maintainers']:
            continue
        if v['maintainers'] != modules[k]['authors']:
            continue
        remove.append(k)
    for x in remove:
        data['files'].pop(x, None)

    #####################################
    # add special notifies
    #####################################
    data['files']['lib/ansible/modules/cloud/amazon/'] = {
        'notify': ['willthames']
    }

    #####################################
    # reduce to namespace maintainers
    #####################################
    groups = {}
    for k,v in data['files'].items():
        dn = os.path.dirname(k)
        if dn not in groups:
            groups[dn] = {
                'matches': [],
                'values': []
            }
        groups[dn]['matches'].append(k)
        if v not in groups[dn]['values']:
            groups[dn]['values'].append(v)
    for k,v in groups.items():
        if not len(v['values']) == 1:
            continue
        if len(v['matches']) == 1:
            continue
        #print(k)
        #pprint(v)

        newk = k + '/'
        data['files'][newk] = v['values'][0]
        for pf in v['matches']:
            data['files'].pop(pf, None)

        if newk in removed:
            import epdb; epdb.st()


    #####################################
    # make a sorted dict
    #####################################

    files = data['files']
    data['files'] = OrderedDict()
    fkeys = sorted(files.keys())
    fkeys = [x.replace('lib/ansible/modules', '$modules') for x in fkeys]
    fkeys = sorted(set(fkeys))
    for fkey in fkeys:
        if fkey.startswith('$modules'):
            mkey = fkey.replace('$modules', 'lib/ansible/modules')
            data['files'][fkey] = files[mkey]
        else:
            data['files'][fkey] = files[fkey]

    data['macros'] = OrderedDict()
    data['macros']['modules'] = 'lib/ansible/modules'
    macro_items = macro_teams.items()
    macro_items = [[x[1],x[0]] for x in macro_items]
    macro_dict ={}
    for x in macro_items:
        macro_dict[x[0]] = x[1]

    data['macros']['team_ansible'] = []
    keys = macro_dict.keys()
    for k in sorted(keys):
        team = macro_dict[k]
        team = team.split(',')
        if len(team) < 10:
            team = " ".join(team)
        data['macros']['team_%s' % k] = team

    # if maintainers is the only subkey, make the primary value a string
    for k,v in data['files'].items():
        keys = v.keys()
        if keys == ['maintainers']:
            if isinstance(v['maintainers'], list):
                data['files'][k] = " ".join(v['maintainers'])
            else:
                data['files'][k] = v['maintainers']
        for xk in ['ignored', 'notified', 'maintainers']:
            if xk in data['files'][k]:
                if not isinstance(data['files'][k][xk], (str, unicode)):
                    data['files'][k][xk] = " ".join(data['files'][k][xk])


    # write it once with ryaml to make it ordered
    ryaml = rYAML()
    (fo, fn) = tempfile.mkstemp()
    with open(fn, 'wb') as f:
        ryaml.dump(data, f)

    # read it back in
    with open(fn, 'rb') as f:
        ylines = f.readlines()

    phase = None
    for idx,x in enumerate(ylines):
        x = x.rstrip()
        x = x.replace('!!omap', '')
        if x.endswith(' {}'):
            x = x.replace(' {}', '')
        if x.startswith('-'):
            x = x.replace('-', ' ', 1)
        ylines[idx] = x


        if x.startswith(' ') and ':' not in x and '-' not in x:
            ylines[idx-1] += ' ' + x.strip()
            ylines[idx] = ''

    ylines = [x for x in ylines if x.strip()]
    ylines = [HEADER] + ylines

    with open(dest, 'wb') as f:
        f.write('\n'.join(ylines))

예제 #13

파일 보기

파일: maintainer_v1-v2.py 프로젝트: gundalow/ansibullbot

def main():
    pprint(sys.argv)
    dest = sys.argv[1]
    print('dest: %s' % dest)

    # get_valid_labels('ansible/ansible')
    # /home/jtanner/.ansibullbot/cache/ansible/ansible/labels.pickle

    with open(os.path.expanduser('~/.ansibullbot/cache/ansible/ansible/labels.pickle'), 'rb') as f:
        labels = pickle_load(f)
    valid_labels = [x.name for x in labels[1]]

    FILEMAP_FILENAME = 'FILEMAP.json'
    COMPONENTMAP_FILENAME = 'COMPONENTMAP.json'
    FI = FileIndexer(
        checkoutdir=os.path.expanduser(
            '~/.ansibullbot/cache/ansible.files.checkout'
        ),
        cmap=COMPONENTMAP_FILENAME,
    )

    module_cache_file = '/tmp/mi-modules.json'
    if not os.path.isfile(module_cache_file):
        module_maintainers = get_maintainers_mapping()
        MI = ModuleIndexer(maintainers=module_maintainers)
        MI.get_ansible_modules()
        with open(module_cache_file, 'wb') as f:
            f.write(json.dumps(MI.modules, sort_keys=True, indent=2))
        modules = MI.modules
    else:
        with open(module_cache_file, 'rb') as f:
            modules = json.loads(f.read())

    macro_teams = {
        'Qalthos,gundalow,privateip': 'openswitch',
        'Qalthos,ganeshrn,gundalow,privateip,rcarrillocruz,trishnaguha': 'networking',
        'GGabriele,jedelman8,mikewiebe,privateip,rahushen,rcarrillocruz,trishnaguha': 'nxos',
        'emonty,j2sol,juliakreger,rcarrillocruz,shrews,thingee': 'openstack',
        'chrishoffman,manuel-sousa,romanek-adam': 'rabbitmq',
        'alikins,barnabycourt,flossware,vritant': 'rhn',
        'Qalthos,amitsi,gundalow,privateip': 'netvisor',
        'haroldwongms,nitzmahone,tstringer': 'azure',
        'dagwieers,jborean93,jhawkesworth': 'windows',
        'dagwieers,dav1x,jctanner': 'vmware',
        'isharacomix,jrrivers,privateip': 'cumulus',
        'chiradeep,giorgos-nikolopoulos': 'netscaler',
        'ericsysmin,grastogi23,khaltore': 'avi',
        'ghjm,jlaska,matburt,wwitzel3': 'tower',
        'hulquest,lmprice,timuster': 'netapp',
    }

    usermap = {
        'mpdehaan': False
    }
    namemap = {
        'Shrews': 'shrews'
    }
    exclusions = {
        '*': ['chouseknecht', 'Java1Guy', 'franckcuny', 'mhite', 'bennojoy', 'risaacson', 'whenrik'],
        'network/wakeonlan': ['dagwiers'],
    }

    removed = get_removed_maintainers()

    teams = {}
    data = {}
    data['files'] = {}

    # merge the moduleindexer data
    for k,v in modules.items():
        fp = v.get('filepath')
        if not fp or not fp.startswith('lib/ansible'):
            continue
        data['files'][k] = {}
        if v['_maintainers']:
            data['files'][k]['maintainers'] = []
            data['files'][k]['maintainers'] = [x for x in v['_maintainers']]
        if v['authors']:
            if 'maintainers' not in data['files'][k]:
                data['files'][k]['maintainers'] = []
            data['files'][k]['maintainers'] += v['authors']
            data['files'][k]['maintainers'] = sorted(set(data['files'][k]['maintainers']))

        # validate each maintainer exists
        if 'maintainers' in data['files'][k]:
            maintainers = []
            for x in data['files'][k]['maintainers']:

                if x in exclusions['*']:
                    continue

                if x in namemap:
                    x = namemap[x]
                if x in usermap:
                    if usermap[x]:
                        maintainers.append(x)
                else:
                    if x == 'ansible':
                        usermap['ansible'] = True
                        maintainers.append(x)
                        continue
                    res = requests.get('https://github.com/%s' % x)
                    if res.status_code == 200:
                        usermap[x] = True
                        maintainers.append(x)
                    else:
                        usermap[x] = False
            data['files'][k]['maintainers'] = sorted(set(maintainers))
            if not data['files'][k]['maintainers']:
                data['files'][k].pop('maintainers', None)

    # merge the removed people
    for k,v in removed.items():
        k = os.path.join('lib/ansible/modules', k)
        v = sorted(set(v))
        if k in data['files']:
            if 'maintainers' in data['files'][k]:
                for vx in v:
                    if vx in data['files'][k]['maintainers']:
                        data['files'][k]['maintainers'].remove(vx)
                        if 'ignored' not in data['files'][k]:
                            data['files'][k]['ignored'] = []
                        data['files'][k]['ignored'].append(vx)
                if not data['files'][k]['maintainers']:
                    data['files'][k].pop('maintainers', None)
                    #import epdb; epdb.st()

    # merge the fileindexer data
    for k in FI.files:
        #if 'contrib/inventory' in k:
        #    import epdb; epdb.st()
        #print(k)
        try:
            klabels = FI.get_component_labels(valid_labels, [k])
            if klabels:
                klabels = [x for x in klabels if not x.startswith('c:')]
                if not klabels:
                    continue
                if k not in data['files']:
                    data['files'][k] = {}
                if 'labels' not in data['files'][k]:
                    data['files'][k]['labels'] = []
                data['files'][k]['labels'] += klabels
        except UnicodeDecodeError:
            continue

        keywords = FI.get_keywords_for_file(k)
        if keywords:
            if k not in data['files']:
                data['files'][k] = {}
            if 'keywords' not in data['files'][k]:
                data['files'][k]['keywords'] = []
            data['files'][k]['keywords'] += keywords
            #import epdb; epdb.st()

    '''
    # calculate all teams
    for k,v in data['files'].items():
        if not v.get('maintainers'):
            continue
        maintainers = sorted(set(v['maintainers']))
        key = ','.join(maintainers)
        if key not in teams:
            teams[key] = []
        teams[key].append(k)

    # rank and show
    steams = sorted(teams, key=len, reverse=True)
    for x in steams[0:15]:
        if x in macro_teams:
            continue
        pprint(teams[x])
        print(x)
        import epdb; epdb.st()
    import epdb; epdb.st()
    '''

    for k,v in data['files'].items():
        if not v.get('maintainers'):
            continue
        maintainers = v.get('maintainers')
        for idx,x in enumerate(maintainers):
            if x == 'ansible':
                maintainers[idx] = '$team_ansible'
        if maintainers == ['$team_ansible']:
            data['files'][k]['maintainers'] = ' '.join(maintainers)
            continue
        if len(maintainers) == 1:
            data['files'][k]['maintainers'] = ' '.join(maintainers)
            continue
        mkey = ','.join(sorted(set(maintainers)))
        if mkey in macro_teams:
            maintainers = ['$team_%s' % macro_teams[mkey]]
            data['files'][k]['maintainers'] = ' '.join(maintainers)
        else:
            # partial matching
            match = None
            subnames = sorted(set(maintainers))
            for sn in subnames:
                filtered = [x for x in subnames if x != sn]
                fkey = ','.join(filtered)
                if fkey in macro_teams:
                    match = fkey
            if match:
                to_clear = match.split(',')
                maintainers = [x for x in maintainers if x not in to_clear]
                data['files'][k]['maintainers'] = ' '.join(maintainers)

    # fix deprecations
    safe_names = [x for x in FI.files if all(c in string.printable for c in x)]
    remove = []
    for k,v in data['files'].items():
        maintainers = v.get('maintainers')
        if maintainers:
            if 'DEPRECATED' in data['files'][k]['maintainers']:
                data['files'][k].pop('maintainers', None)
                data['files'][k]['deprecated'] = True
        bn = os.path.basename(k)
        if bn.startswith('_') and bn != '__init__.py' and '/modules/' in k:
            '''
            data['files'][k]['deprecated'] = True
            if 'maintainers' in data['files'][k]:
                data['files'][k].pop('maintainers', None)
            '''
            remove.append(k)

        # get rid of files no longer in the repo
        if k not in safe_names:
            remove.append(k)

    for x in remove:
        data['files'].pop(x, None)


    # remove any keys where maintainers == authors
    remove = []
    for k,v in data['files'].items():
        if v.keys() != ['maintainers']:
            continue
        if v['maintainers'] != modules[k]['authors']:
            continue
        remove.append(k)
    for x in remove:
        data['files'].pop(x, None)

    #####################################
    # add special notifies
    #####################################
    data['files']['lib/ansible/modules/cloud/amazon/'] = {
        'notify': ['willthames']
    }

    #####################################
    # reduce to namespace maintainers
    #####################################
    groups = {}
    for k,v in data['files'].items():
        dn = os.path.dirname(k)
        if dn not in groups:
            groups[dn] = {
                'matches': [],
                'values': []
            }
        groups[dn]['matches'].append(k)
        if v not in groups[dn]['values']:
            groups[dn]['values'].append(v)
    for k,v in groups.items():
        if not len(v['values']) == 1:
            continue
        if len(v['matches']) == 1:
            continue
        #print(k)
        #pprint(v)

        newk = k + '/'
        data['files'][newk] = v['values'][0]
        for pf in v['matches']:
            data['files'].pop(pf, None)

        if newk in removed:
            import epdb; epdb.st()


    #####################################
    # make a sorted dict
    #####################################

    files = data['files']
    data['files'] = OrderedDict()
    fkeys = sorted(files.keys())
    fkeys = [x.replace('lib/ansible/modules', '$modules') for x in fkeys]
    fkeys = sorted(set(fkeys))
    for fkey in fkeys:
        if fkey.startswith('$modules'):
            mkey = fkey.replace('$modules', 'lib/ansible/modules')
            data['files'][fkey] = files[mkey]
        else:
            data['files'][fkey] = files[fkey]

    data['macros'] = OrderedDict()
    data['macros']['modules'] = 'lib/ansible/modules'
    macro_items = macro_teams.items()
    macro_items = [[x[1],x[0]] for x in macro_items]
    macro_dict ={}
    for x in macro_items:
        macro_dict[x[0]] = x[1]

    data['macros']['team_ansible'] = []
    keys = macro_dict.keys()
    for k in sorted(keys):
        team = macro_dict[k]
        team = team.split(',')
        if len(team) < 10:
            team = " ".join(team)
        data['macros']['team_%s' % k] = team

    # if maintainers is the only subkey, make the primary value a string
    for k,v in data['files'].items():
        keys = v.keys()
        if keys == ['maintainers']:
            if isinstance(v['maintainers'], list):
                data['files'][k] = " ".join(v['maintainers'])
            else:
                data['files'][k] = v['maintainers']
        for xk in ['ignored', 'notified', 'maintainers']:
            if xk in data['files'][k]:
                if not isinstance(data['files'][k][xk], (str, unicode)):
                    data['files'][k][xk] = " ".join(data['files'][k][xk])


    # write it once with ryaml to make it ordered
    ryaml = rYAML()
    (fo, fn) = tempfile.mkstemp()
    with open(fn, 'wb') as f:
        ryaml.dump(data, f)

    # read it back in
    with open(fn, 'rb') as f:
        ylines = f.readlines()

    phase = None
    for idx,x in enumerate(ylines):
        x = x.rstrip()
        x = x.replace('!!omap', '')
        if x.endswith(' {}'):
            x = x.replace(' {}', '')
        if x.startswith('-'):
            x = x.replace('-', ' ', 1)
        ylines[idx] = x


        if x.startswith(' ') and ':' not in x and '-' not in x:
            ylines[idx-1] += ' ' + x.strip()
            ylines[idx] = ''

    ylines = [x for x in ylines if x.strip()]
    ylines = [HEADER] + ylines

    with open(dest, 'wb') as f:
        f.write('\n'.join(ylines))