def run(self): # create the fileindexer fi_cache = '/tmp/ansibullbot/cache/{}.files.checkout'.format(self.repo) fi_cache = os.path.expanduser(fi_cache) self.file_indexer = FileIndexer(botmetafile=self.botmetafile, checkoutdir=fi_cache, repo=self.repo) self.file_indexer.update() # make a repo object for the github api repo = self.ghw.get_repo(self.repo) # map for issue type to label try: label_map = repo.get_label_map() except UnknownObjectException: label_map = {} # collect issues if not self.number: issues = repo.get_issues() else: issue = repo.get_issue(int(self.number)) issues = [issue] # iterate through issues and apply actions for issue in issues: logging.info('triaging %s' % issue.html_url) actions = DefaultActions() # wrap the issue for extra magic cachedir = os.path.join(self.cachedir_base, self.repo) iw = IssueWrapper(github=self.ghw, repo=repo, issue=issue, cachedir=cachedir, file_indexer=self.file_indexer) # what did the submitter provide in the body? td = iw.template_data missing = iw.missing_template_sections if missing and 'needs_template' not in iw.labels: actions.newlabel.append('needs_template') # what type of issue is this? if 'issue type' in td: mapped_label = label_map.get(td['issue type']) if mapped_label: if mapped_label not in iw.labels: actions.newlabel.append(mapped_label) pprint(vars(actions)) self.apply_actions(iw, actions)
def setUpClass(cls): """Init the matcher""" cachedir = tempfile.mkdtemp() gitrepo = GitShallowRepo(cachedir=cachedir, repo=ComponentMatcher.REPO) gitrepo.update() file_indexer = FileIndexer(gitrepo=gitrepo) file_indexer.get_files() file_indexer.parse_metadata() cls.component_matcher = ComponentMatcher(email_cache={}, gitrepo=gitrepo, file_indexer=file_indexer)
def __init__(self, gitrepo=None, botmetafile=None, cachedir=None, email_cache=None, file_indexer=None): self.cachedir = cachedir self.botmetafile = botmetafile self.email_cache = email_cache if file_indexer: self.file_indexer = file_indexer else: self.file_indexer = FileIndexer( botmetafile=self.botmetafile, checkoutdir=self.cachedir ) if gitrepo: self.gitrepo = gitrepo else: self.gitrepo = GitRepoWrapper(cachedir=self.cachedir, repo=self.REPO) self.strategy = None self.strategies = [] self.indexed_at = False self.updated_at = None self.update()
def get_file_indexer(m_manage_checkout, m_checkoutdir): indexer = FileIndexer() indexer.get_files() indexer.parse_metadata() return indexer
def main(): set_logger() METAFILES = extract_metafiles() SKIP = load_skip() EXPECTED = load_expected() MATCH_MAP = load_match_map() ERRORS = [] ERRORS_COMPONENTS = [] start_at = None if len(sys.argv) == 2: start_at = int(sys.argv[1]) FI = FileIndexer(checkoutdir=CACHEDIR) with open('/tmp/files.json', 'wb') as f: f.write(json.dumps(FI.files, indent=2)) GQLC = GithubGraphQLClient(C.DEFAULT_GITHUB_TOKEN) MI = ModuleIndexer(cachedir=CACHEDIR, gh_client=GQLC, blames=False, commits=False) CM = AnsibleComponentMatcher(cachedir=CACHEDIR) for k, v in MI.modules.items(): if k in MATCH_MAP: MATCH_MAP.pop(k, None) kname = v.get('name') if kname not in MATCH_MAP: MATCH_MAP[kname] = v.get('repo_filename') if kname + ' module' not in MATCH_MAP: MATCH_MAP[kname + ' module'] = v.get('repo_filename') if kname + 'module: ' + kname not in MATCH_MAP: MATCH_MAP['module: ' + kname] = v.get('repo_filename') if kname + 'module ' + kname not in MATCH_MAP: MATCH_MAP['module ' + kname] = v.get('repo_filename') # /modules/remote_management/foreman/katello.py pname = k.replace('lib/ansible', '') if pname not in MATCH_MAP: MATCH_MAP[pname] = v.get('repo_filename') # ansible/modules/packaging/os/rpm_key.py pname = k.replace('lib/', '/') if pname not in MATCH_MAP: MATCH_MAP[pname] = v.get('repo_filename') # /ansible/modules/packaging/os/rpm_key.py pname = k.replace('lib/', '') if pname not in MATCH_MAP: MATCH_MAP[pname] = v.get('repo_filename') # ansible/lib/ansible/modules/monitoring/monit.py pname = 'ansible/' + k if pname not in MATCH_MAP: MATCH_MAP[pname] = v.get('repo_filename') # network/f5/bigip_gtm_wide_ip pname = k.replace('lib/ansible/modules/', '') pname = pname.replace('.py', '') pname = pname.replace('.ps1', '') if pname not in MATCH_MAP: MATCH_MAP[pname] = v.get('repo_filename') # network/f5/bigip_gtm_wide_ip.py pname = k.replace('lib/ansible/modules/', '') if pname not in MATCH_MAP: MATCH_MAP[pname] = v.get('repo_filename') # modules/packaging/os/pkgng.py pname = k.replace('lib/ansible/', '') if pname not in MATCH_MAP: MATCH_MAP[pname] = v.get('repo_filename') save_match_map(MATCH_MAP) total = len(METAFILES) for IDMF, MF in enumerate(METAFILES): if start_at and IDMF < start_at: continue with open(MF, 'rb') as f: meta = json.loads(f.read()) if not meta.get('is_issue'): continue component = meta.get('template_data', {}).get('component_raw') #if component != 'Module `synchronize`': #if component != 'Module: include_role': # continue if component: print(f'------------------------------------------ {total}|{IDMF}') print(meta['html_url']) print(meta['title']) print(component) hurl = meta['html_url'] if hurl in SKIP: continue # bad template or bad template parsing if len(component) > 100: continue iw = IssueWrapperMock(meta) if 'module' not in iw.body.lower( ) and 'module' not in iw.title.lower(): continue expected_fns = [] # OLD METHOD if hurl not in EXPECTED and component not in MATCH_MAP: cmf = get_component_match_facts(iw, meta, FI, MI, LABELS) expected_fns = cmf.get('module_match') if not isinstance(expected_fns, list): expected_fns = [expected_fns] expected_fns = [x['repo_filename'] for x in expected_fns if x] if 'component_matches' in cmf: expected_fns = [ x['filename'] for x in cmf['component_matches'] ] expected_fns = sorted(set(expected_fns)) # NEW METHOD cmr = CM.match_components(iw.title, iw.body, iw.template_data.get('component_raw')) cmr_fns = [x['repo_filename'] for x in cmr if x] cmr_fns = sorted(set(cmr_fns)) # VALIDATE FROM EXPECTED IF KNOWN if hurl in EXPECTED: if EXPECTED[hurl] and not isinstance(EXPECTED[hurl], list): expected_fns = [EXPECTED[hurl]] elif EXPECTED[hurl]: expected_fns = EXPECTED[hurl] else: expected_fns = [] # USE THE CACHED MAP if component in MATCH_MAP: expected_fns = MATCH_MAP[component] if not isinstance(expected_fns, list): expected_fns = [expected_fns] elif component.lower() in MATCH_MAP: expected_fns = MATCH_MAP[component.lower()] if not isinstance(expected_fns, list): expected_fns = [expected_fns] elif component.startswith(':\n') and component.endswith(' module'): mapkey = component.lstrip(':\n') if mapkey in MATCH_MAP: expected_fns = MATCH_MAP[mapkey] if not isinstance(expected_fns, list): expected_fns = [expected_fns] # OLD CODE USED ACTION PLUGINS INSTEAD OF MODULES if expected_fns != cmr_fns and hurl not in EXPECTED: if len(expected_fns) == 1 and len( cmr_fns) == 1 and 'plugins/action' in expected_fns[0]: e_bn = os.path.basename(expected_fns[0]) c_bn = os.path.basename(cmr_fns[0]) if e_bn == c_bn: MATCH_MAP[component] = cmr_fns save_match_map(MATCH_MAP) continue # DOCS URLS if expected_fns != cmr_fns and hurl not in EXPECTED: if len(cmr_fns) == 1 and 'lib/ansible/modules' in cmr_fns[0]: c_bn = os.path.basename(cmr_fns[0]) if f'docs.ansible.com/ansible/latest/{c_bn}_module.html' in component: MATCH_MAP[component] = cmr_fns save_match_map(MATCH_MAP) continue elif CM.strategy in ['search_by_regex_urls']: MATCH_MAP[component] = cmr_fns save_match_map(MATCH_MAP) continue # NXOS ISSUES HAVE NXOS_VERSION HEADER if '- nxos' in component: if len(cmr_fns) == 1: if os.path.basename(cmr_fns[0]).replace('.py', '') in component: MATCH_MAP[component] = cmr_fns save_match_map(MATCH_MAP) continue #import epdb; epdb.st() # ODDBALL MODULE COMPONENTS if len(cmr_fns) == 1 and 'lib/ansible/modules' in cmr_fns[0]: bn = os.path.basename(cmr_fns[0]) bn = bn.replace('.py', '') bn = bn.replace('.ps1', '') if (bn in component or bn.lstrip('_') in component) and 'module' in component.lower(): MATCH_MAP[component] = cmr_fns save_match_map(MATCH_MAP) continue elif component == '- ' + bn: MATCH_MAP[component] = cmr_fns save_match_map(MATCH_MAP) continue elif component == bn + '.py' or component == bn + '.ps1': MATCH_MAP[component] = cmr_fns save_match_map(MATCH_MAP) continue elif component == '_' + bn + '.py' or component == '_' + bn + '.ps1': MATCH_MAP[component] = cmr_fns save_match_map(MATCH_MAP) continue elif component == ':\n' + bn or component == ':\n' + bn.lstrip( '_'): MATCH_MAP[component] = cmr_fns save_match_map(MATCH_MAP) continue # 'multiple modules', etc ... if component in CM.KEYWORDS or component.lower() in CM.KEYWORDS: if component in CM.KEYWORDS and CM.KEYWORDS[ component] is None and not cmr_fns: MATCH_MAP[component] = cmr_fns save_match_map(MATCH_MAP) continue elif component.lower() in CM.KEYWORDS and CM.KEYWORDS[ component.lower()] is None and not cmr_fns: MATCH_MAP[component] = cmr_fns save_match_map(MATCH_MAP) continue elif len(cmr_fns) == 1 and cmr_fns[0] == CM.KEYWORDS.get( component): MATCH_MAP[component] = cmr_fns save_match_map(MATCH_MAP) continue elif len(cmr_fns) == 1 and cmr_fns[0] == CM.KEYWORDS.get( component.lower()): MATCH_MAP[component] = cmr_fns save_match_map(MATCH_MAP) continue if component.lstrip('-').strip() in CM.KEYWORDS and len( cmr_fns) == 1: cname = component.lstrip('-').strip() if CM.KEYWORDS[cname] == cmr_fns[0]: MATCH_MAP[component] = cmr_fns save_match_map(MATCH_MAP) continue if component.endswith(' lookup') and len( cmr_fns ) == 1 and 'lib/ansible/plugins/lookup' in cmr_fns[0]: MATCH_MAP[component] = cmr_fns save_match_map(MATCH_MAP) continue if component.endswith(' inventory script') and len( cmr_fns) == 1 and 'contrib/inventory' in cmr_fns[0]: MATCH_MAP[component] = cmr_fns save_match_map(MATCH_MAP) continue if component.startswith('ansible/lib') and len(cmr_fns) == 1: fn = cmr_fns[0] if 'ansible/' + fn == component: MATCH_MAP[component] = cmr_fns save_match_map(MATCH_MAP) continue if component.endswith(' inventory plugin') and len(cmr_fns) == 1: fn = cmr_fns[0] if fn.startswith('lib/ansible/plugins/inventory'): MATCH_MAP[component] = cmr_fns save_match_map(MATCH_MAP) continue if component == 'ec2.py' and cmr_fns and 'contrib/inventory/ec2.py' in cmr_fns: MATCH_MAP[component] = cmr_fns save_match_map(MATCH_MAP) continue if len(expected_fns) == 1 and len(cmr_fns) == 1: if os.path.basename(expected_fns[0]) == os.path.basename( cmr_fns[0]): MATCH_MAP[component] = cmr_fns save_match_map(MATCH_MAP) continue # COMPARE AND RECORD if expected_fns != cmr_fns and hurl not in EXPECTED: if component in MATCH_MAP or component.lower() in MATCH_MAP: if component.lower() in MATCH_MAP: mmc = MATCH_MAP[component.lower()] else: mmc = MATCH_MAP[component] if not isinstance(mmc, list): mmc == [mmc] if mmc == cmr_fns: EXPECTED[iw.html_url] = cmr_fns save_expected(EXPECTED) continue print('## COMPONENT ...') print(component) print('## EXPECTED ...') pprint(expected_fns) print('## RESULT ...') pprint(cmr_fns) print('## STRATEGIES ..') pprint(CM.strategy) pprint(CM.strategies) print('--------------------------------') res = raw_input('Is the result correct? (y/n/s/d): ') if res.lower() in ['y', 'yes']: MATCH_MAP[component] = cmr_fns EXPECTED[iw.html_url] = cmr_fns save_expected(EXPECTED) continue elif res.lower() in ['s', 'skip']: SKIP.append(hurl) save_skip(SKIP) continue elif res.lower() in ['d', 'debug']: import epdb epdb.st() ERRORS.append(iw.html_url) ERRORS_COMPONENTS.append({ 'url': iw.html_url, 'component': component, 'component_raw': iw.template_data.get('component_raw'), 'result': cmr_fns, 'expected': expected_fns, 'strategy': CM.strategy, 'strategies': CM.strategies }) else: if component not in MATCH_MAP: MATCH_MAP[component] = cmr_fns save_match_map(MATCH_MAP) if hurl not in EXPECTED: EXPECTED[hurl] = cmr_fns save_expected(EXPECTED) continue pprint(ERRORS) fn = os.path.join(FIXTUREDIR, 'component_errors.json') with open(fn, 'wb') as f: f.write(json.dumps(ERRORS_COMPONENTS, indent=2, sort_keys=True)) clean_metafiles(METAFILES)
class SimpleTriager(DefaultTriager): def __init__(self): super(SimpleTriager, self).__init__() # get valid labels logging.info('getting labels') self.valid_labels = self.get_valid_labels(self.repo) @classmethod def create_parser(cls): parser = DefaultTriager.create_parser() parser.description = "Triage issue and pullrequest queues for any github repo.\n" \ " (NOTE: only useful if you have commit access to" \ " the repo in question.)" parser.add_argument( "--pr", "--id", type=str, dest="number", help="Triage only the specified pr|issue (separated by commas)") parser.add_argument("--repo", "-r", type=str, required=True, help="Github repo to triage (defaults to all)") return parser def run(self): # create the fileindexer fi_cache = '/tmp/ansibullbot/cache/{}.files.checkout'.format(self.repo) fi_cache = os.path.expanduser(fi_cache) self.file_indexer = FileIndexer(botmetafile=self.botmetafile, checkoutdir=fi_cache, repo=self.repo) self.file_indexer.update() # make a repo object for the github api repo = self.ghw.get_repo(self.repo) # map for issue type to label try: label_map = repo.get_label_map() except UnknownObjectException: label_map = {} # collect issues if not self.number: issues = repo.get_issues() else: issue = repo.get_issue(int(self.number)) issues = [issue] # iterate through issues and apply actions for issue in issues: logging.info('triaging %s' % issue.html_url) actions = DefaultActions() # wrap the issue for extra magic cachedir = os.path.join(self.cachedir_base, self.repo) iw = IssueWrapper(github=self.ghw, repo=repo, issue=issue, cachedir=cachedir, file_indexer=self.file_indexer) # what did the submitter provide in the body? td = iw.template_data missing = iw.missing_template_sections if missing and 'needs_template' not in iw.labels: actions.newlabel.append('needs_template') # what type of issue is this? if 'issue type' in td: mapped_label = label_map.get(td['issue type']) if mapped_label: if mapped_label not in iw.labels: actions.newlabel.append(mapped_label) pprint(vars(actions)) self.apply_actions(iw, actions)
class SimpleTriager(DefaultTriager): def __init__(self): super(SimpleTriager, self).__init__() # get valid labels logging.info('getting labels') self.valid_labels = self.get_valid_labels(self.repo) @classmethod def create_parser(cls): parser = DefaultTriager.create_parser() parser.description = "Triage issue and pullrequest queues for any github repo.\n" \ " (NOTE: only useful if you have commit access to" \ " the repo in question.)" parser.add_argument("--pr", "--id", type=str, dest="number", help="Triage only the specified pr|issue (separated by commas)") parser.add_argument("--repo", "-r", type=str, required=True, help="Github repo to triage (defaults to all)") return parser def run(self): # create the fileindexer fi_cache = '/tmp/ansibullbot/cache/{}.files.checkout'.format(self.repo) fi_cache = os.path.expanduser(fi_cache) self.file_indexer = FileIndexer(botmetafile=self.botmetafile, checkoutdir=fi_cache, repo=self.repo) self.file_indexer.update() # make a repo object for the github api repo = self.ghw.get_repo(self.repo) # map for issue type to label try: label_map = repo.get_label_map() except UnknownObjectException: label_map = {} # collect issues if not self.number: issues = repo.get_issues() else: issue = repo.get_issue(int(self.number)) issues = [issue] # iterate through issues and apply actions for issue in issues: logging.info('triaging %s' % issue.html_url) actions = DefaultActions() # wrap the issue for extra magic cachedir = os.path.join(self.cachedir_base, self.repo) iw = IssueWrapper(github=self.ghw, repo=repo, issue=issue, cachedir=cachedir, file_indexer=self.file_indexer) # what did the submitter provide in the body? td = iw.template_data missing = iw.missing_template_sections if missing and 'needs_template' not in iw.labels: actions.newlabel.append('needs_template') # what type of issue is this? if 'issue type' in td: mapped_label = label_map.get(td['issue type']) if mapped_label: if mapped_label not in iw.labels: actions.newlabel.append(mapped_label) pprint(vars(actions)) self.apply_actions(iw, actions)
def main(): # need a file indexer to get the template FI = FileIndexer(checkoutdir='/tmp/fileindexer') FI.update() # get the expected sections tf_content = FI.get_file_content('.github/ISSUE_TEMPLATE.md') tf_sections = extract_template_sections(tf_content, header='#####') required_sections = [x.lower() for x in tf_sections.keys() if tf_sections[x]['required']] if not required_sections: required_sections = ['issue type', 'component name', 'ansible version', 'summary'] section_order = list(tf_sections.items()) section_order = sorted(section_order, key=lambda x: x[1]['index']) section_order = [x[0] for x in section_order] # all known possibilities section_names = ['PLUGIN NAME', 'ANSIBLE CONFIGURATION'] + section_order + ['ENVIRONMENT'] # get the numbers script = "#!/bin/bash\n" script += "\n" script += "URL='https://github.com/ansible/ansible/issues?utf8=%E2%9C%93&q=is%3Aopen%20label%3Aneeds_template%20author%3Aansibot'\n" script += "PYTHONPATH=$(pwd) scripts/scrape_github_issues_url $URL\n" (rc, so, se) = runscript(script) numbers = json.loads(so) numbers = sorted(set(numbers)) for idn,number in enumerate(numbers): print('{} {}|{}'.format(number,idn,len(numbers))) fixed = [] iurl = 'https://api.github.com/repos/ansible/ansible/issues/{}'.format(number) irr = requests.get(iurl, headers=get_headers()) idata = irr.json() curl = idata['comments_url'] crr = requests.get(curl, headers=get_headers()) comments = crr.json() if crr.links: print('paginated comments') nextp = [x for x in crr.links.items() if x[1]['rel'] == 'next'][0][1]['url'] while nextp: nrr = requests.get(nextp, headers=get_headers()) comments += nrr.json() try: nextp = [x for x in nrr.links.items() if x[1]['rel'] == 'next'][0][1]['url'] except: nextp = None #import epdb; epdb.st() newbody = idata['body'] # extract ts = run_template_extract(FI, newbody, number, 'issue', section_names) # cleanup if 'environment' in ts: ts['os / environment'] = ts['environment'] ts.pop('environment', None) # what is missing? missing = [x for x in required_sections if x.lower() not in ts] if not missing: print('{} nothing missing'.format(number)) continue # simple sed for this one if missing == ['component name'] and 'plugin name' in newbody.lower(): if 'PLUGIN NAME' in newbody: newbody = newbody.replace('PLUGIN NAME', 'COMPONENT NAME') if 'Plugin Name' in newbody: newbody = newbody.replace('Plugin Name', 'Component Name') if 'plugin name' in newbody: newbody = newbody.replace('plugin name', 'component name') print('{} sed/plugin name/component name'.format(number)) cr = requests.patch(iurl, headers=get_headers(), data=json.dumps({'body': newbody})) if cr.status_code != 200: print('failed to edit body {}'.format(idata['html_url'])) import epdb; epdb.st() continue if 'summary' in missing: ts['summary'] = newbody missing.remove('summary') fixed.append('summary') if 'issue type' in missing: # get migrated issue try: mi = get_migrated_issue(idata['body']) except Exception as e: print(e) mi = None if mi: itype = None # get issue type label from migrated issue mi_labels = [x['name'] for x in mi['labels']] if 'bug_report' in mi_labels: itype = 'Bug Report' elif 'feature_idea' in mi_labels: itype = 'Feature Idea' elif 'docs_report' in mi_labels: itype = 'Documentation Report' if itype is not None: ts['issue type'] = itype missing.remove('issue type') fixed.append('issue type') if 'component name' in missing: component = find_component(idata, ts, newbody, comments) if component: missing.remove('component name') ts['component name'] = component fixed.append('component name') if 'ansible version' in missing: labels = [x['name'] for x in idata['labels']] labels = [x for x in labels if x.startswith('affects_')] labels = sorted(set(labels)) if labels: version = labels[0].replace('affects_', '') else: version = "N/A" missing.remove('ansible version') ts['ansible version'] = version fixed.append('ansible version') if not missing: print('# {}'.format(idata['html_url'])) print('# title: {}'.format(idata['title'])) print('# component: {}'.format(ts['component name'])) print('# version: {}'.format(ts['ansible version'])) print('# fixed: {}'.format(fixed)) newbody = render_body(ts, section_order) print('<====================================================>') print(newbody) print('<====================================================>') import epdb; epdb.st() cr = requests.patch(iurl, headers=get_headers(), data=json.dumps({'body': newbody})) if cr.status_code != 200: print('failed to edit body {}'.format(idata['html_url'])) import epdb; epdb.st() continue print('no solution(s) for {} {}'.format(idata['html_url'], missing)) print('DONE')
class AnsibleComponentMatcher(object): BOTMETA = {} INDEX = {} REPO = 'https://github.com/ansible/ansible' STOPWORDS = ['ansible', 'core', 'plugin'] STOPCHARS = ['"', "'", '(', ')', '?', '*', '`', ',', ':', '?', '-'] BLACKLIST = ['new module', 'new modules'] FILE_NAMES = [] MODULES = {} MODULE_NAMES = [] MODULE_NAMESPACE_DIRECTORIES = [] # FIXME: THESE NEED TO GO INTO BOTMETA # ALSO SEE search_by_regex_generic ... KEYWORDS = { 'all': None, 'ansiballz': 'lib/ansible/executor/module_common.py', 'ansible-console': 'lib/ansible/cli/console.py', 'ansible-galaxy': 'lib/ansible/galaxy', 'ansible-inventory': 'lib/ansible/cli/inventory.py', 'ansible-playbook': 'lib/ansible/playbook', 'ansible playbook': 'lib/ansible/playbook', 'ansible playbooks': 'lib/ansible/playbook', 'ansible-pull': 'lib/ansible/cli/pull.py', 'ansible-vault': 'lib/ansible/parsing/vault', 'ansible-vault edit': 'lib/ansible/parsing/vault', 'ansible-vault show': 'lib/ansible/parsing/vault', 'ansible-vault decrypt': 'lib/ansible/parsing/vault', 'ansible-vault encrypt': 'lib/ansible/parsing/vault', 'async': 'lib/ansible/modules/utilities/logic/async_wrapper.py', 'become': 'lib/ansible/playbook/become.py', 'block': 'lib/ansible/playbook/block.py', 'blocks': 'lib/ansible/playbook/block.py', 'callback plugin': 'lib/ansible/plugins/callback', 'callback plugins': 'lib/ansible/plugins/callback', 'conditional': 'lib/ansible/playbook/conditional.py', 'docs': 'docs', 'delegate_to': 'lib/ansible/playbook/task.py', 'facts': 'lib/ansible/module_utils/facts', 'galaxy': 'lib/ansible/galaxy', 'groupvars': 'lib/ansible/vars/hostvars.py', 'group vars': 'lib/ansible/vars/hostvars.py', 'handlers': 'lib/ansible/playbook/handler.py', 'hostvars': 'lib/ansible/vars/hostvars.py', 'host vars': 'lib/ansible/vars/hostvars.py', 'integration tests': 'test/integration', 'inventory script': 'contrib/inventory', 'jinja2 template system': 'lib/ansible/template', 'module_utils': 'lib/ansible/module_utils', 'multiple modules': None, 'new module(s) request': None, 'new modules request': None, 'new module request': None, 'new module': None, 'network_cli': 'lib/ansible/plugins/connection/network_cli.py', 'network_cli.py': 'lib/ansible/plugins/connection/network_cli.py', 'network modules': 'lib/ansible/modules/network', 'paramiko': 'lib/ansible/plugins/connection/paramiko_ssh.py', 'role': 'lib/ansible/playbook/role', 'roles': 'lib/ansible/playbook/role', 'ssh': 'lib/ansible/plugins/connection/ssh.py', 'ssh authentication': 'lib/ansible/plugins/connection/ssh.py', 'setup / facts': 'lib/ansible/modules/system/setup.py', 'setup': 'lib/ansible/modules/system/setup.py', 'task executor': 'lib/ansible/executor/task_executor.py', 'testing': 'test/', 'validate-modules': 'test/sanity/validate-modules', 'vault': 'lib/ansible/parsing/vault', 'vault edit': 'lib/ansible/parsing/vault', 'vault documentation': 'lib/ansible/parsing/vault', 'with_items': 'lib/ansible/playbook/loop_control.py', 'windows modules': 'lib/ansible/modules/windows', 'winrm': 'lib/ansible/plugins/connection/winrm.py' } def __init__(self, gitrepo=None, botmetafile=None, cachedir=None, email_cache=None, file_indexer=None): self.cachedir = cachedir self.botmetafile = botmetafile self.email_cache = email_cache if file_indexer: self.file_indexer = file_indexer else: self.file_indexer = FileIndexer( botmetafile=self.botmetafile, checkoutdir=self.cachedir ) if gitrepo: self.gitrepo = gitrepo else: self.gitrepo = GitRepoWrapper(cachedir=self.cachedir, repo=self.REPO) self.strategy = None self.strategies = [] self.indexed_at = False self.updated_at = None self.update() def update(self, email_cache=None): if email_cache: self.email_cache = email_cache self.gitrepo.update() self.index_files() self.indexed_at = datetime.datetime.now() self.cache_keywords() self.updated_at = datetime.datetime.now() def index_files(self): self.BOTMETA = {} self.MODULES = {} self.MODULE_NAMES = [] self.MODULE_NAMESPACE_DIRECTORIES = [] self.load_meta() for fn in self.gitrepo.module_files: if os.path.isdir(fn): continue mname = os.path.basename(fn) mname = mname.replace('.py', '').replace('.ps1', '') if mname.startswith('__'): continue mdata = { 'name': mname, 'repo_filename': fn, 'filename': fn } if fn not in self.MODULES: self.MODULES[fn] = mdata.copy() else: self.MODULES[fn].update(mdata) self.MODULE_NAMESPACE_DIRECTORIES = [os.path.dirname(x) for x in self.gitrepo.module_files] self.MODULE_NAMESPACE_DIRECTORIES = sorted(set(self.MODULE_NAMESPACE_DIRECTORIES)) # make a list of names by enumerating the files self.MODULE_NAMES = [os.path.basename(x) for x in self.gitrepo.module_files] self.MODULE_NAMES = [x for x in self.MODULE_NAMES if x.endswith('.py') or x.endswith('.ps1')] self.MODULE_NAMES = [x.replace('.ps1', '').replace('.py', '') for x in self.MODULE_NAMES] self.MODULE_NAMES = [x for x in self.MODULE_NAMES if not x.startswith('__')] self.MODULE_NAMES = sorted(set(self.MODULE_NAMES)) # make a list of names by calling ansible-doc checkoutdir = self.gitrepo.checkoutdir checkoutdir = os.path.abspath(checkoutdir) cmd = '. {}/hacking/env-setup; ansible-doc -t module -F'.format(checkoutdir) logging.debug(cmd) (rc, so, se) = run_command(cmd, cwd=checkoutdir) if rc: raise Exception("'ansible-doc' command failed (%s, %s %s)" % (rc, so, se)) lines = so.split('\n') for line in lines: parts = line.split() parts = [x.strip() for x in parts] if len(parts) != 2 or checkoutdir not in line: continue mname = parts[0] if mname not in self.MODULE_NAMES: self.MODULE_NAMES.append(mname) fpath = parts[1] fpath = fpath.replace(checkoutdir + '/', '') if fpath not in self.MODULES: self.MODULES[fpath] = { 'name': mname, 'repo_filename': fpath, 'filename': fpath } _modules = self.MODULES.copy() for k, v in _modules.items(): kparts = os.path.splitext(k) if kparts[-1] == '.ps1': _k = kparts[0] + '.py' checkpath = os.path.join(checkoutdir, _k) if not os.path.isfile(checkpath): _k = k else: _k = k ME = ModuleExtractor(os.path.join(checkoutdir, _k), email_cache=self.email_cache) if k not in self.BOTMETA['files']: self.BOTMETA['files'][k] = { 'deprecated': os.path.basename(k).startswith('_'), 'labels': os.path.dirname(k).split('/'), 'authors': ME.authors, 'maintainers': ME.authors, 'maintainers_keys': [], 'notified': ME.authors, 'ignored': [], 'support': ME.metadata.get('supported_by', 'community'), 'metadata': ME.metadata.copy() } else: bmeta = self.BOTMETA['files'][k].copy() bmeta['metadata'] = ME.metadata.copy() if 'notified' not in bmeta: bmeta['notified'] = [] if 'maintainers' not in bmeta: bmeta['maintainers'] = [] if not bmeta.get('supported_by'): bmeta['supported_by'] = ME.metadata.get('supported_by', 'community') if 'authors' not in bmeta: bmeta['authors'] = [] for x in ME.authors: if x not in bmeta['authors']: bmeta['authors'].append(x) if x not in bmeta['maintainers']: bmeta['maintainers'].append(x) if x not in bmeta['notified']: bmeta['notified'].append(x) if not bmeta.get('labels'): bmeta['labels'] = os.path.dirname(k).split('/') bmeta['deprecated'] = os.path.basename(k).startswith('_') self.BOTMETA['files'][k].update(bmeta) # clean out the ignorees if 'ignored' in self.BOTMETA['files'][k]: for ignoree in self.BOTMETA['files'][k]['ignored']: for thiskey in ['maintainers', 'notified']: while ignoree in self.BOTMETA['files'][k][thiskey]: self.BOTMETA['files'][k][thiskey].remove(ignoree) # write back to the modules self.MODULES[k].update(self.BOTMETA['files'][k]) def load_meta(self): if self.botmetafile is not None: with open(self.botmetafile, 'rb') as f: rdata = f.read() else: fp = '.github/BOTMETA.yml' rdata = self.gitrepo.get_file_content(fp) self.BOTMETA = BotMetadataParser.parse_yaml(rdata) def cache_keywords(self): for k, v in self.BOTMETA['files'].items(): if not v.get('keywords'): continue for kw in v['keywords']: if kw not in self.KEYWORDS: self.KEYWORDS[kw] = k def clean_body(self, body, internal=False): body = body.lower() body = body.strip() for SC in self.STOPCHARS: if body.startswith(SC): body = body.lstrip(SC) body = body.strip() if body.endswith(SC): body = body.rstrip(SC) body = body.strip() if internal and SC in body: body = body.replace(SC, '') body = body.strip() body = body.strip() return body def match(self, issuewrapper): iw = issuewrapper matchdata = self.match_components( iw.title, iw.body, iw.template_data.get('component_raw'), files=iw.files ) return matchdata def match_components(self, title, body, component, files=None): """Make a list of matching files with metadata""" self.strategy = None self.strategies = [] # No matching necessary for PRs, but should provide consistent api if files: matched_filenames = files[:] else: matched_filenames = [] if component is None: return matched_filenames component = component.encode('ascii', 'ignore') logging.debug('match "{}"'.format(component)) delimiters = ['\n', ',', ' + ', ' & '] delimited = False for delimiter in delimiters: if delimiter in component: delimited = True components = component.split(delimiter) for _component in components: _matches = self._match_component(title, body, _component) self.strategies.append(self.strategy) # bypass for blacklist if None in _matches: _matches = [] matched_filenames += _matches # do not process any more delimiters break if not delimited: matched_filenames += self._match_component(title, body, component) self.strategies.append(self.strategy) # bypass for blacklist if None in matched_filenames: return [] # reduce subpaths if matched_filenames: matched_filenames = self.reduce_filepaths(matched_filenames) # create metadata for each matched file component_matches = [] matched_filenames = sorted(set(matched_filenames)) for fn in matched_filenames: component_matches.append(self.get_meta_for_file(fn)) return component_matches def _match_component(self, title, body, component): """Find matches for a single line""" matched_filenames = [] # context sets the path prefix to narrow the search window if 'module_util' in title.lower() or 'module_util' in component.lower(): context = 'lib/ansible/module_utils' elif 'module util' in title.lower() or 'module util' in component.lower(): context = 'lib/ansible/module_utils' elif 'module' in title.lower() or 'module' in component.lower(): context = 'lib/ansible/modules' elif 'dynamic inventory' in title.lower() or 'dynamic inventory' in component.lower(): context = 'contrib/inventory' elif 'inventory script' in title.lower() or 'inventory script' in component.lower(): context = 'contrib/inventory' elif 'inventory plugin' in title.lower() or 'inventory plugin' in component.lower(): context = 'lib/ansible/plugins/inventory' else: context = None if not component: return [] if component not in self.STOPWORDS and component not in self.STOPCHARS: if not matched_filenames: matched_filenames += self.search_by_keywords(component, exact=True) if matched_filenames: self.strategy = 'search_by_keywords' if not matched_filenames: matched_filenames += self.search_by_module_name(component) if matched_filenames: self.strategy = 'search_by_module_name' if not matched_filenames: matched_filenames += self.search_by_regex_module_globs(component) if matched_filenames: self.strategy = 'search_by_regex_module_globs' if not matched_filenames: matched_filenames += self.search_by_regex_modules(component) if matched_filenames: self.strategy = 'search_by_regex_modules' if not matched_filenames: matched_filenames += self.search_by_regex_generic(component) if matched_filenames: self.strategy = 'search_by_regex_generic' if not matched_filenames: matched_filenames += self.search_by_regex_urls(component) if matched_filenames: self.strategy = 'search_by_regex_urls' if not matched_filenames: matched_filenames += self.search_by_tracebacks(component) if matched_filenames: self.strategy = 'search_by_tracebacks' if not matched_filenames: matched_filenames += self.search_by_filepath(component, context=context) if matched_filenames: self.strategy = 'search_by_filepath' if not matched_filenames: matched_filenames += self.search_by_filepath(component, partial=True) if matched_filenames: self.strategy = 'search_by_filepath[partial]' if not matched_filenames: matched_filenames += self.search_by_keywords(component, exact=False) if matched_filenames: self.strategy = 'search_by_keywords!exact' if matched_filenames: matched_filenames += self.include_modules_from_test_targets(matched_filenames) return matched_filenames def search_by_module_name(self, component): matches = [] component = self.clean_body(component) # docker-container vs. docker_container if component not in self.MODULE_NAMES: component = component.replace('-', '_') if component in self.MODULE_NAMES: mmatch = self.find_module_match(component) if mmatch: if isinstance(mmatch, list): for x in mmatch: matches.append(x['repo_filename']) else: matches.append(mmatch['repo_filename']) return matches def search_by_keywords(self, component, exact=True): """Simple keyword search""" component = component.lower() matches = [] if component in self.STOPWORDS: matches = [None] elif component in self.KEYWORDS: matches = [self.KEYWORDS[component]] elif not exact: for k, v in self.KEYWORDS.items(): if ' ' + k + ' ' in component or ' ' + k + ' ' in component.lower(): logging.debug('keyword match: {}'.format(k)) matches.append(v) elif ' ' + k + ':' in component or ' ' + k + ':' in component: logging.debug('keyword match: {}'.format(k)) matches.append(v) elif component.endswith(' ' + k) or component.lower().endswith(' ' + k): logging.debug('keyword match: {}'.format(k)) matches.append(v) elif (k in component or k in component.lower()) and k in self.BLACKLIST: logging.debug('blacklist match: {}'.format(k)) matches.append(None) return matches def search_by_regex_urls(self, body): # http://docs.ansible.com/ansible/latest/copy_module.html # http://docs.ansible.com/ansible/latest/dev_guide/developing_modules.html # http://docs.ansible.com/ansible/latest/postgresql_db_module.html # [helm module](https//docs.ansible.com/ansible/2.4/helm_module.html) # Windows module: win_robocopy\nhttp://docs.ansible.com/ansible/latest/win_robocopy_module.html # Examples:\n* archive (https://docs.ansible.com/ansible/archive_module.html)\n* s3_sync (https://docs.ansible.com/ansible/s3_sync_module.html) # https//github.com/ansible/ansible/blob/devel/lib/ansible/modules/windows/win_dsc.ps1L228 matches = [] urls = re.findall( 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', body ) if urls: for url in urls: url = url.rstrip(')') if '/blob' in url and url.endswith('.py'): parts = url.split('/') bindex = parts.index('blob') fn = '/'.join(parts[bindex+2:]) matches.append(fn) elif '_module.html' in url: parts = url.split('/') fn = parts[-1].replace('_module.html', '') choices = [x for x in self.gitrepo.files if '/' + fn in x or '/_' + fn in x] choices = [x for x in choices if 'lib/ansible/modules' in x] if len(choices) > 1: choices = [x for x in choices if '/' + fn + '.py' in x or '/' + fn + '.ps1' in x or '/_' + fn + '.py' in x] if not choices: pass elif len(choices) == 1: matches.append(choices[0]) else: pass else: pass return matches def search_by_regex_modules(self, body): # foo module # foo and bar modules # foo* modules # foo* module body = body.lower() logging.debug('attempt regex match on: {}'.format(body)) # https://www.tutorialspoint.com/python/python_reg_expressions.htm patterns = [ r'\:\n(\S+)\.py', r'(\S+)\.py', r'\-(\s+)(\S+)(\s+)module', r'\`ansible_module_(\S+)\.py\`', r'module(\s+)\-(\s+)(\S+)', r'module(\s+)(\S+)', r'\`(\S+)\`(\s+)module', r'(\S+)(\s+)module', r'the (\S+) command', r'(\S+) \(.*\)', r'(\S+)\-module', r'modules/(\S+)', r'module\:(\s+)\`(\S+)\`', r'module\: (\S+)', r'module (\S+)', r'module `(\S+)`', r'module: (\S+)', r'new (\S+) module', r'the (\S+) module', r'the \"(\S+)\" module', r':\n(\S+) module', r'(\S+) module', r'(\S+) core module', r'(\S+) extras module', r':\n\`(\S+)\` module', r'\`(\S+)\` module', r'`(\S+)` module', r'(\S+)\* modules', r'(\S+) and (\S+)', r'(\S+) or (\S+)', r'(\S+) \+ (\S+)', r'(\S+) \& (\S)', r'(\S+) and (\S+) modules', r'(\S+) or (\S+) module', r'(\S+)_module', r'action: (\S+)', r'action (\S+)', r'ansible_module_(\S+)\.py', r'ansible_module_(\S+)', r'ansible_modules_(\S+)\.py', r'ansible_modules_(\S+)', r'(\S+) task', r'(\s+)\((\S+)\)', r'(\S+)(\s+)(\S+)(\s+)modules', r'(\S+)(\s+)module\:(\s+)(\S+)', r'\-(\s+)(\S+)(\s+)module', r'\:(\s+)(\S+)(\s+)module', r'\-(\s+)ansible(\s+)(\S+)(\s+)(\S+)(\s+)module', r'.*(\s+)(\S+)(\s+)module.*' ] matches = [] logging.debug('check patterns against: {}'.format(body)) for pattern in patterns: mobj = re.match(pattern, body, re.M | re.I) if mobj: logging.debug('pattern {} matched on "{}"'.format(pattern, body)) for x in range(0, mobj.lastindex+1): try: mname = mobj.group(x) logging.debug('mname: {}'.format(mname)) if mname == body: continue mname = self.clean_body(mname) if not mname.strip(): continue mname = mname.strip().lower() if ' ' in mname: continue if '/' in mname: continue mname = mname.replace('.py', '').replace('.ps1', '') logging.debug('--> {}'.format(mname)) # attempt to match a module module_match = self.find_module_match(mname) if not module_match: pass elif isinstance(module_match, list): for m in module_match: matches.append(m['repo_filename']) elif isinstance(module_match, dict): matches.append(module_match['repo_filename']) except Exception as e: logging.error(e) if matches: break return matches def search_by_regex_module_globs(self, body): # All AWS modules # BigIP modules # NXOS modules # azurerm modules matches = [] body = self.clean_body(body) logging.debug('try globs on: {}'.format(body)) keymap = { 'all': None, 'ec2': 'lib/ansible/modules/cloud/amazon', 'ec2_*': 'lib/ansible/modules/cloud/amazon', 'aws': 'lib/ansible/modules/cloud/amazon', 'amazon': 'lib/ansible/modules/cloud/amazon', 'google': 'lib/ansible/modules/cloud/google', 'gce': 'lib/ansible/modules/cloud/google', 'gcp': 'lib/ansible/modules/cloud/google', 'bigip': 'lib/ansible/modules/network/f5', 'nxos': 'lib/ansible/modules/network/nxos', 'azure': 'lib/ansible/modules/cloud/azure', 'azurerm': 'lib/ansible/modules/cloud/azure', 'openstack': 'lib/ansible/modules/cloud/openstack', 'ios': 'lib/ansible/modules/network/ios', } regexes = [ r'(\S+) ansible modules', r'all (\S+) based modules', r'all (\S+) modules', r'.* all (\S+) modules.*', r'(\S+) modules', r'(\S+\*) modules', r'all cisco (\S+\*) modules', ] mobj = None for x in regexes: mobj = re.match(x, body) if mobj: logging.debug('matched glob: {}'.format(x)) break if not mobj: logging.debug('no glob matches') if mobj: keyword = mobj.group(1) if not keyword.strip(): pass elif keyword in keymap: if keymap[keyword]: matches.append(keymap[keyword]) else: if '*' in keyword: keyword = keyword.replace('*', '') # check for directories first fns = [x for x in self.MODULE_NAMESPACE_DIRECTORIES if keyword in x] # check for files second if not fns: fns = [x for x in self.gitrepo.module_files if 'lib/ansible/modules' in x and keyword in x] if fns: matches += fns if matches: matches = sorted(set(matches)) return matches def search_by_regex_generic(self, body): # foo dynamic inventory script # foo filter # https://www.tutorialspoint.com/python/python_reg_expressions.htm patterns = [ [r'(.*) action plugin', 'lib/ansible/plugins/action'], [r'(.*) inventory plugin', 'lib/ansible/plugins/inventory'], [r'(.*) dynamic inventory', 'contrib/inventory'], [r'(.*) dynamic inventory (script|file)', 'contrib/inventory'], [r'(.*) inventory script', 'contrib/inventory'], [r'(.*) filter', 'lib/ansible/plugins/filter'], [r'(.*) jinja filter', 'lib/ansible/plugins/filter'], [r'(.*) jinja2 filter', 'lib/ansible/plugins/filter'], [r'(.*) template filter', 'lib/ansible/plugins/filter'], [r'(.*) fact caching plugin', 'lib/ansible/plugins/cache'], [r'(.*) fact caching module', 'lib/ansible/plugins/cache'], [r'(.*) lookup plugin', 'lib/ansible/plugins/lookup'], [r'(.*) lookup', 'lib/ansible/plugins/lookup'], [r'(.*) callback plugin', 'lib/ansible/plugins/callback'], [r'(.*)\.py callback', 'lib/ansible/plugins/callback'], [r'callback plugin (.*)', 'lib/ansible/plugins/callback'], [r'(.*) stdout callback', 'lib/ansible/plugins/callback'], [r'stdout callback (.*)', 'lib/ansible/plugins/callback'], [r'stdout_callback (.*)', 'lib/ansible/plugins/callback'], [r'(.*) callback plugin', 'lib/ansible/plugins/callback'], [r'(.*) connection plugin', 'lib/ansible/plugins/connection'], [r'(.*) connection type', 'lib/ansible/plugins/connection'], [r'(.*) connection', 'lib/ansible/plugins/connection'], [r'(.*) transport', 'lib/ansible/plugins/connection'], [r'connection=(.*)', 'lib/ansible/plugins/connection'], [r'connection: (.*)', 'lib/ansible/plugins/connection'], [r'connection (.*)', 'lib/ansible/plugins/connection'], [r'strategy (.*)', 'lib/ansible/plugins/strategy'], [r'(.*) strategy plugin', 'lib/ansible/plugins/strategy'], [r'(.*) module util', 'lib/ansible/module_utils'], [r'ansible-galaxy (.*)', 'lib/ansible/galaxy'], [r'ansible-playbook (.*)', 'lib/ansible/playbook'], [r'ansible/module_utils/(.*)', 'lib/ansible/module_utils'], [r'module_utils/(.*)', 'lib/ansible/module_utils'], [r'lib/ansible/module_utils/(.*)', 'lib/ansible/module_utils'], [r'(\S+) documentation fragment', 'lib/ansible/utils/module_docs_fragments'], ] body = self.clean_body(body) matches = [] for pattern in patterns: mobj = re.match(pattern[0], body, re.M | re.I) if mobj: logging.debug('pattern hit: {}'.format(pattern)) fname = mobj.group(1) fname = fname.lower() fpath = os.path.join(pattern[1], fname) if fpath in self.gitrepo.files: matches.append(fpath) elif os.path.join(pattern[1], fname + '.py') in self.gitrepo.files: fname = os.path.join(pattern[1], fname + '.py') matches.append(fname) else: # fallback to the directory matches.append(pattern[1]) return matches def search_by_tracebacks(self, body): matches = [] if 'Traceback (most recent call last)' in body: lines = body.split('\n') for line in lines: line = line.strip() if line.startswith('DistributionNotFound'): matches = ['setup.py'] break elif line.startswith('File'): fn = line.split()[1] for SC in self.STOPCHARS: fn = fn.replace(SC, '') if 'ansible_module_' in fn: fn = os.path.basename(fn) fn = fn.replace('ansible_module_', '') matches = [fn] elif 'cli/playbook.py' in fn: fn = 'lib/ansible/cli/playbook.py' elif 'module_utils' in fn: idx = fn.find('module_utils/') fn = 'lib/ansible/' + fn[idx:] elif 'ansible/' in fn: idx = fn.find('ansible/') fn1 = fn[idx:] if 'bin/' in fn1: if not fn1.startswith('bin'): idx = fn1.find('bin/') fn1 = fn1[idx:] if fn1.endswith('.py'): fn1 = fn1.rstrip('.py') elif 'cli/' in fn1: idx = fn1.find('cli/') fn1 = fn1[idx:] fn1 = 'lib/ansible/' + fn1 elif 'lib' not in fn1: fn1 = 'lib/' + fn1 if fn1 not in self.files: pass return matches def search_by_filepath(self, body, partial=False, context=None): """Find known filepaths in body""" matches = [] body = self.clean_body(body) if not body: return [] if body.lower() in self.STOPCHARS: return [] if body.lower() in self.STOPWORDS: return [] # 'inventory manager' vs. 'inventory/manager' if partial and ' ' in body: body = body.replace(' ', '/') if 'site-packages' in body: res = re.match('(.*)/site-packages/(.*)', body) body = res.group(2) if 'modules/core/' in body: body = body.replace('modules/core/', 'modules/') if 'modules/extras/' in body: body = body.replace('modules/extras/', 'modules/') if 'ansible-modules-core/' in body: body = body.replace('ansible-modules-core/', '/') if 'ansible-modules-extras/' in body: body = body.replace('ansible-modules-extras/', '/') if body.startswith('ansible/lib/ansible'): body = body.replace('ansible/lib', 'lib') if body.startswith('ansible/') and not body.startswith('ansible/modules'): body = body.replace('ansible/', '', 1) if 'module/' in body: body = body.replace('module/', 'modules/') logging.debug('search filepath [{}] [{}]: {}'.format(context, partial, body)) if len(body) < 2: return [] if '/' in body: body_paths = body.split('/') elif ' ' in body: body_paths = body.split() body_paths = [x.strip() for x in body_paths if x.strip()] else: body_paths = [body] if 'networking' in body_paths: ix = body_paths.index('networking') body_paths[ix] = 'network' if 'plugin' in body_paths: ix = body_paths.index('plugin') body_paths[ix] = 'plugins' if not context or 'lib/ansible/modules' in context: mmatch = self.find_module_match(body) if mmatch: if isinstance(mmatch, list) and len(mmatch) > 1: # only allow for exact prefix globbing here ... if [x for x in mmatch if x['repo_filename'].startswith(body)]: return [x['repo_filename'] for x in mmatch] elif isinstance(mmatch, list): return [x['repo_filename'] for x in mmatch] else: return [mmatch['repo_filename']] if body in self.gitrepo.files: matches = [body] else: for fn in self.gitrepo.files: # limit the search set if a context is given if context is not None and not fn.startswith(context): continue if fn.endswith(body) or fn.endswith(body + '.py') or fn.endswith(body + '.ps1'): # ios_config.py -> test_ios_config.py vs. ios_config.py bn1 = os.path.basename(body) bn2 = os.path.basename(fn) if bn2.startswith(bn1): matches = [fn] break if partial: # netapp_e_storagepool storage module # lib/ansible/modules/storage/netapp/netapp_e_storagepool.py # if all subpaths are in this filepath, it is a match bp_total = 0 fn_paths = fn.split('/') fn_paths.append(fn_paths[-1].replace('.py', '').replace('.ps1', '')) for bp in body_paths: if bp in fn_paths: bp_total += 1 if bp_total == len(body_paths): matches = [fn] break elif bp_total > 1: if (float(bp_total) / float(len(body_paths))) >= (2.0 / 3.0): if fn not in matches: matches.append(fn) if matches: tr = [] for match in matches[:]: # reduce to longest path for m in matches: if match == m: continue if len(m) < match and match.startswith(m): tr.append(m) for r in tr: if r in matches: logging.debug('trimming {}'.format(r)) matches.remove(r) matches = sorted(set(matches)) logging.debug('return: {}'.format(matches)) return matches def reduce_filepaths(self, matches): # unique _matches = [] for _match in matches: if _match not in _matches: _matches.append(_match) matches = _matches[:] # squash to longest path if matches: tr = [] for match in matches[:]: # reduce to longest path for m in matches: if match == m: continue if m is None or match is None: continue if len(m) < match and match.startswith(m) or match.endswith(m): tr.append(m) for r in tr: if r in matches: matches.remove(r) return matches def include_modules_from_test_targets(self, matches): """Map test targets to the module files""" new_matches = [] for match in matches: if not match: continue # include modules from test targets if 'test/integration/targets' in match: paths = match.split('/') tindex = paths.index('targets') mname = paths[tindex+1] mrs = self.find_module_match(mname, exact=True) if mrs: if not isinstance(mrs, list): mrs = [mrs] for mr in mrs: new_matches.append(mr['repo_filename']) return new_matches def get_meta_for_file(self, filename): meta = { 'repo_filename': filename, 'name': os.path.basename(filename).split('.')[0], 'notify': [], 'assign': [], 'authors': [], 'committers': [], 'maintainers': [], 'labels': [], 'ignore': [], 'support': None, 'supported_by': None, 'deprecated': False, 'topic': None, 'subtopic': None, 'namespace': None, 'namespace_maintainers': [] } populated = False filenames = [filename, os.path.splitext(filename)[0]] # powershell meta is in the python file if filename.endswith('.ps1'): pyfile = filename.replace('.ps1', '.py') if pyfile in self.BOTMETA['files']: filenames.append(pyfile) botmeta_entries = self.file_indexer._filenames_to_keys(filenames) for entry in botmeta_entries: fdata = self.BOTMETA['files'][entry].copy() if 'authors' in fdata: meta['authors'] = fdata['authors'] if 'maintainers' in fdata: meta['notify'] += fdata['maintainers'] meta['assign'] += fdata['maintainers'] meta['maintainers'] += fdata['maintainers'] if 'notified' in fdata: meta['notify'] += fdata['notified'] if 'labels' in fdata: meta['labels'] += fdata['labels'] if 'ignore' in fdata: meta['ignore'] += fdata['ignore'] if 'ignored' in fdata: meta['ignore'] += fdata['ignored'] if 'support' in fdata: if isinstance(fdata['support'], list): meta['support'] = fdata['support'][0] else: meta['support'] = fdata['support'] elif 'supported_by' in fdata: if isinstance(fdata['supported_by'], list): meta['support'] = fdata['supported_by'][0] else: meta['support'] = fdata['supported_by'] if 'deprecated' in fdata: meta['deprecated'] = fdata['deprecated'] populated = True # walk up the tree for more meta paths = filename.split('/') for idx, x in enumerate(paths): thispath = '/'.join(paths[:(0-idx)]) if thispath in self.BOTMETA['files']: fdata = self.BOTMETA['files'][thispath].copy() if 'support' in fdata and not meta['support']: if isinstance(fdata['support'], list): meta['support'] = fdata['support'][0] else: meta['support'] = fdata['support'] if 'labels' in fdata: meta['labels'] += fdata['labels'] if 'maintainers' in fdata: meta['notify'] += fdata['maintainers'] meta['assign'] += fdata['maintainers'] meta['maintainers'] += fdata['maintainers'] if 'ignore' in fdata: meta['ignore'] += fdata['ignore'] if 'notified' in fdata: meta['notify'] += fdata['notified'] if 'lib/ansible/modules' in filename: topics = [x for x in paths if x not in ['lib', 'ansible', 'modules']] topics = [x for x in topics if x != os.path.basename(filename)] if len(topics) == 2: meta['topic'] = topics[0] meta['subtopic'] = topics[1] elif len(topics) == 1: meta['topic'] = topics[0] meta['namespace'] = '/'.join(topics) # set namespace maintainers (skip !modules for now) if filename.startswith('lib/ansible/modules'): ns = meta.get('namespace') keys = self.BOTMETA['files'].keys() keys = [x for x in keys if x.startswith(os.path.join('lib/ansible/modules', ns))] ignored = [] for key in keys: meta['namespace_maintainers'] += self.BOTMETA['files'][key].get('maintainers', []) ignored += self.BOTMETA['files'][key].get('ignored', []) for ignoree in ignored: while ignoree in meta['namespace_maintainers']: meta['namespace_maintainers'].remove(ignoree) # new modules should default to "community" support if filename.startswith('lib/ansible/modules') and filename not in self.gitrepo.files: meta['support'] = 'community' meta['supported_by'] = 'community' # test targets for modules should inherit from their modules if filename.startswith('test/integration/targets') and filename not in self.BOTMETA['files']: whitelist = [ 'labels', 'ignore', 'deprecated', 'authors', 'assign', 'maintainers', 'notify', 'topic', 'subtopic', 'support' ] paths = filename.split('/') tindex = paths.index('targets') mname = paths[tindex+1] mmatch = self._find_module_match(mname, exact=True) if mmatch: mmeta = self.get_meta_for_file(mmatch[0]['repo_filename']) for k, v in mmeta.items(): if k in whitelist and v: if isinstance(meta[k], list): meta[k] = sorted(set(meta[k] + v)) elif not meta[k]: meta[k] = v # make new test targets community by default if not meta['support'] and not meta['supported_by']: meta['support'] = 'community' # it's okay to remove things from legacy-files.txt if filename == 'test/sanity/pep8/legacy-files.txt' and not meta['support']: meta['support'] = 'community' # fallback to core support if not meta['support']: meta['support'] = 'core' # align support and supported_by if meta['support'] != meta['supported_by']: if meta['support'] and not meta['supported_by']: meta['supported_by'] = meta['support'] elif not meta['support'] and meta['supported_by']: meta['support'] = meta['supported_by'] # clean up the result _meta = meta.copy() for k, v in _meta.items(): if isinstance(v, list): meta[k] = sorted(set(v)) # walk up the botmeta tree looking for ignores to include if meta.get('repo_filename'): namespace_paths = os.path.dirname(meta['repo_filename']) namespace_paths = namespace_paths.split('/') for x in reversed(range(0, len(namespace_paths) + 1)): this_ns_path = '/'.join(namespace_paths[:x]) if not this_ns_path: continue print('check {}'.format(this_ns_path)) if this_ns_path in self.BOTMETA['files']: this_ignore = self.BOTMETA['files'][this_ns_path].get('ignore') or \ self.BOTMETA['files'][this_ns_path].get('ignored') or \ self.BOTMETA['files'][this_ns_path].get('ignores') print('ignored: {}'.format(this_ignore)) if this_ignore: for username in this_ignore: if username not in meta['ignore']: meta['ignore'].append(username) # process ignores AGAIN. if meta.get('ignore'): for k, v in meta.items(): if k == 'ignore': continue if not isinstance(v, list): continue for ignoree in meta['ignore']: if ignoree in v: meta[k].remove(ignoree) return meta def find_module_match(self, pattern, exact=False): '''Exact module name matching''' logging.debug('find_module_match for "{}"'.format(pattern)) candidate = None BLACKLIST = [ 'module_utils', 'callback', 'network modules', 'networking modules' 'windows modules' ] if not pattern or pattern is None: return None # https://github.com/ansible/ansible/issues/19755 if pattern == 'setup': pattern = 'lib/ansible/modules/system/setup.py' if '/facts.py' in pattern or ' facts.py' in pattern: pattern = 'lib/ansible/modules/system/setup.py' # https://github.com/ansible/ansible/issues/18527 # docker-container -> docker_container if '-' in pattern: pattern = pattern.replace('-', '_') if 'module_utils' in pattern: # https://github.com/ansible/ansible/issues/20368 return None elif 'callback' in pattern: return None elif 'lookup' in pattern: return None elif 'contrib' in pattern and 'inventory' in pattern: return None elif pattern.lower() in BLACKLIST: return None candidate = self._find_module_match(pattern, exact=exact) if not candidate: candidate = self._find_module_match(os.path.basename(pattern)) if not candidate and '/' in pattern and not pattern.startswith('lib/'): ppy = None ps1 = None if not pattern.endswith('.py') and not pattern.endswith('.ps1'): ppy = pattern + '.py' if not pattern.endswith('.py') and not pattern.endswith('.ps1'): ps1 = pattern + '.ps1' for mf in self.gitrepo.module_files: if pattern in mf: if mf.endswith(pattern) or mf.endswith(ppy) or mf.endswith(ps1): candidate = mf break return candidate def _find_module_match(self, pattern, exact=False): logging.debug('matching on {}'.format(pattern)) matches = [] if isinstance(pattern, unicode): pattern = pattern.encode('ascii', 'ignore') logging.debug('_find_module_match: {}'.format(pattern)) noext = pattern.replace('.py', '').replace('.ps1', '') # exact is looking for a very precise name such as "vmware_guest" if exact: candidates = [pattern] else: candidates = [pattern, '_' + pattern, noext, '_' + noext] for k, v in self.MODULES.items(): if v['name'] in candidates: logging.debug('match {} on name: {}'.format(k, v['name'])) matches = [v] break if not matches: # search by key ... aka the filepath for k, v in self.MODULES.items(): if k == pattern: logging.debug('match {} on key: {}'.format(k, k)) matches = [v] break # spellcheck if not exact and not matches and '/' not in pattern: _pattern = pattern if not isinstance(_pattern, unicode): _pattern = _pattern.decode('utf-8') candidates = [] for k, v in self.MODULES.items(): vname = v['name'] if not isinstance(vname, unicode): vname = vname.decode('utf-8') jw = jaro_winkler(vname, _pattern) if jw > .9: candidates.append((jw, k)) for candidate in candidates: matches.append(self.MODULES[candidate[1]]) return matches
def main(): pprint(sys.argv) dest = sys.argv[1] print('dest: %s' % dest) # get_valid_labels('ansible/ansible') # /home/jtanner/.ansibullbot/cache/ansible/ansible/labels.pickle with open(os.path.expanduser('~/.ansibullbot/cache/ansible/ansible/labels.pickle'), 'rb') as f: labels = pickle.load(f) valid_labels = [x.name for x in labels[1]] FILEMAP_FILENAME = 'FILEMAP.json' COMPONENTMAP_FILENAME = 'COMPONENTMAP.json' FI = FileIndexer( checkoutdir=os.path.expanduser( '~/.ansibullbot/cache/ansible.files.checkout' ), cmap=COMPONENTMAP_FILENAME, ) module_cache_file = '/tmp/mi-modules.json' if not os.path.isfile(module_cache_file): module_maintainers = get_maintainers_mapping() MI = ModuleIndexer(maintainers=module_maintainers) MI.get_ansible_modules() with open(module_cache_file, 'wb') as f: f.write(json.dumps(MI.modules, sort_keys=True, indent=2)) modules = MI.modules else: with open(module_cache_file, 'rb') as f: modules = json.loads(f.read()) macro_teams = { 'Qalthos,gundalow,privateip': 'openswitch', 'Qalthos,ganeshrn,gundalow,privateip,rcarrillocruz,trishnaguha': 'networking', 'GGabriele,jedelman8,mikewiebe,privateip,rahushen,rcarrillocruz,trishnaguha': 'nxos', 'emonty,j2sol,juliakreger,rcarrillocruz,shrews,thingee': 'openstack', 'chrishoffman,manuel-sousa,romanek-adam': 'rabbitmq', 'alikins,barnabycourt,flossware,vritant': 'rhn', 'Qalthos,amitsi,gundalow,privateip': 'netvisor', 'haroldwongms,nitzmahone,tstringer': 'azure', 'dagwieers,jborean93,jhawkesworth': 'windows', 'dagwieers,dav1x,jctanner': 'vmware', 'isharacomix,jrrivers,privateip': 'cumulus', 'chiradeep,giorgos-nikolopoulos': 'netscaler', 'ericsysmin,grastogi23,khaltore': 'avi', 'ghjm,jlaska,matburt,wwitzel3': 'tower', 'hulquest,lmprice,timuster': 'netapp', } usermap = { 'mpdehaan': False } namemap = { 'Shrews': 'shrews' } exclusions = { '*': ['chouseknecht', 'Java1Guy', 'franckcuny', 'mhite', 'bennojoy', 'risaacson', 'whenrik'], 'network/wakeonlan': ['dagwiers'], } removed = get_removed_maintainers() teams = {} data = {} data['files'] = {} # merge the moduleindexer data for k,v in modules.items(): fp = v.get('filepath') if not fp or not fp.startswith('lib/ansible'): continue data['files'][k] = {} if v['_maintainers']: data['files'][k]['maintainers'] = [] data['files'][k]['maintainers'] = [x for x in v['_maintainers']] if v['authors']: if 'maintainers' not in data['files'][k]: data['files'][k]['maintainers'] = [] data['files'][k]['maintainers'] += v['authors'] data['files'][k]['maintainers'] = sorted(set(data['files'][k]['maintainers'])) # validate each maintainer exists if 'maintainers' in data['files'][k]: maintainers = [] for x in data['files'][k]['maintainers']: if x in exclusions['*']: continue if x in namemap: x = namemap[x] if x in usermap: if usermap[x]: maintainers.append(x) else: if x == 'ansible': usermap['ansible'] = True maintainers.append(x) continue res = requests.get('https://github.com/%s' % x) if res.status_code == 200: usermap[x] = True maintainers.append(x) else: usermap[x] = False data['files'][k]['maintainers'] = sorted(set(maintainers)) if not data['files'][k]['maintainers']: data['files'][k].pop('maintainers', None) # merge the removed people for k,v in removed.items(): k = os.path.join('lib/ansible/modules', k) v = sorted(set(v)) if k in data['files']: if 'maintainers' in data['files'][k]: for vx in v: if vx in data['files'][k]['maintainers']: data['files'][k]['maintainers'].remove(vx) if 'ignored' not in data['files'][k]: data['files'][k]['ignored'] = [] data['files'][k]['ignored'].append(vx) if not data['files'][k]['maintainers']: data['files'][k].pop('maintainers', None) #import epdb; epdb.st() # merge the fileindexer data for k in FI.files: #if 'contrib/inventory' in k: # import epdb; epdb.st() #print(k) try: klabels = FI.get_component_labels(valid_labels, [k]) if klabels: klabels = [x for x in klabels if not x.startswith('c:')] if not klabels: continue if k not in data['files']: data['files'][k] = {} if 'labels' not in data['files'][k]: data['files'][k]['labels'] = [] data['files'][k]['labels'] += klabels except UnicodeDecodeError: continue keywords = FI.get_keywords_for_file(k) if keywords: if k not in data['files']: data['files'][k] = {} if 'keywords' not in data['files'][k]: data['files'][k]['keywords'] = [] data['files'][k]['keywords'] += keywords #import epdb; epdb.st() ''' # calculate all teams for k,v in data['files'].items(): if not v.get('maintainers'): continue maintainers = sorted(set(v['maintainers'])) key = ','.join(maintainers) if key not in teams: teams[key] = [] teams[key].append(k) # rank and show steams = sorted(teams, key=len, reverse=True) for x in steams[0:15]: if x in macro_teams: continue pprint(teams[x]) print(x) import epdb; epdb.st() import epdb; epdb.st() ''' for k,v in data['files'].items(): if not v.get('maintainers'): continue maintainers = v.get('maintainers') for idx,x in enumerate(maintainers): if x == 'ansible': maintainers[idx] = '$team_ansible' if maintainers == ['$team_ansible']: data['files'][k]['maintainers'] = ' '.join(maintainers) continue if len(maintainers) == 1: data['files'][k]['maintainers'] = ' '.join(maintainers) continue mkey = ','.join(sorted(set(maintainers))) if mkey in macro_teams: maintainers = ['$team_%s' % macro_teams[mkey]] data['files'][k]['maintainers'] = ' '.join(maintainers) else: # partial matching match = None subnames = sorted(set(maintainers)) for sn in subnames: filtered = [x for x in subnames if x != sn] fkey = ','.join(filtered) if fkey in macro_teams: match = fkey if match: to_clear = match.split(',') maintainers = [x for x in maintainers if x not in to_clear] data['files'][k]['maintainers'] = ' '.join(maintainers) # fix deprecations safe_names = [x for x in FI.files if all(c in string.printable for c in x)] remove = [] for k,v in data['files'].items(): maintainers = v.get('maintainers') if maintainers: if 'DEPRECATED' in data['files'][k]['maintainers']: data['files'][k].pop('maintainers', None) data['files'][k]['deprecated'] = True bn = os.path.basename(k) if bn.startswith('_') and bn != '__init__.py' and '/modules/' in k: ''' data['files'][k]['deprecated'] = True if 'maintainers' in data['files'][k]: data['files'][k].pop('maintainers', None) ''' remove.append(k) # get rid of files no longer in the repo if k not in safe_names: remove.append(k) for x in remove: data['files'].pop(x, None) # remove any keys where maintainers == authors remove = [] for k,v in data['files'].items(): if v.keys() != ['maintainers']: continue if v['maintainers'] != modules[k]['authors']: continue remove.append(k) for x in remove: data['files'].pop(x, None) ##################################### # add special notifies ##################################### data['files']['lib/ansible/modules/cloud/amazon/'] = { 'notify': ['willthames'] } ##################################### # reduce to namespace maintainers ##################################### groups = {} for k,v in data['files'].items(): dn = os.path.dirname(k) if dn not in groups: groups[dn] = { 'matches': [], 'values': [] } groups[dn]['matches'].append(k) if v not in groups[dn]['values']: groups[dn]['values'].append(v) for k,v in groups.items(): if not len(v['values']) == 1: continue if len(v['matches']) == 1: continue #print(k) #pprint(v) newk = k + '/' data['files'][newk] = v['values'][0] for pf in v['matches']: data['files'].pop(pf, None) if newk in removed: import epdb; epdb.st() ##################################### # make a sorted dict ##################################### files = data['files'] data['files'] = OrderedDict() fkeys = sorted(files.keys()) fkeys = [x.replace('lib/ansible/modules', '$modules') for x in fkeys] fkeys = sorted(set(fkeys)) for fkey in fkeys: if fkey.startswith('$modules'): mkey = fkey.replace('$modules', 'lib/ansible/modules') data['files'][fkey] = files[mkey] else: data['files'][fkey] = files[fkey] data['macros'] = OrderedDict() data['macros']['modules'] = 'lib/ansible/modules' macro_items = macro_teams.items() macro_items = [[x[1],x[0]] for x in macro_items] macro_dict ={} for x in macro_items: macro_dict[x[0]] = x[1] data['macros']['team_ansible'] = [] keys = macro_dict.keys() for k in sorted(keys): team = macro_dict[k] team = team.split(',') if len(team) < 10: team = " ".join(team) data['macros']['team_%s' % k] = team # if maintainers is the only subkey, make the primary value a string for k,v in data['files'].items(): keys = v.keys() if keys == ['maintainers']: if isinstance(v['maintainers'], list): data['files'][k] = " ".join(v['maintainers']) else: data['files'][k] = v['maintainers'] for xk in ['ignored', 'notified', 'maintainers']: if xk in data['files'][k]: if not isinstance(data['files'][k][xk], (str, unicode)): data['files'][k][xk] = " ".join(data['files'][k][xk]) # write it once with ryaml to make it ordered ryaml = rYAML() (fo, fn) = tempfile.mkstemp() with open(fn, 'wb') as f: ryaml.dump(data, f) # read it back in with open(fn, 'rb') as f: ylines = f.readlines() phase = None for idx,x in enumerate(ylines): x = x.rstrip() x = x.replace('!!omap', '') if x.endswith(' {}'): x = x.replace(' {}', '') if x.startswith('-'): x = x.replace('-', ' ', 1) ylines[idx] = x if x.startswith(' ') and ':' not in x and '-' not in x: ylines[idx-1] += ' ' + x.strip() ylines[idx] = '' ylines = [x for x in ylines if x.strip()] ylines = [HEADER] + ylines with open(dest, 'wb') as f: f.write('\n'.join(ylines))
def main(): pprint(sys.argv) dest = sys.argv[1] print('dest: %s' % dest) # get_valid_labels('ansible/ansible') # /home/jtanner/.ansibullbot/cache/ansible/ansible/labels.pickle with open(os.path.expanduser('~/.ansibullbot/cache/ansible/ansible/labels.pickle'), 'rb') as f: labels = pickle_load(f) valid_labels = [x.name for x in labels[1]] FILEMAP_FILENAME = 'FILEMAP.json' COMPONENTMAP_FILENAME = 'COMPONENTMAP.json' FI = FileIndexer( checkoutdir=os.path.expanduser( '~/.ansibullbot/cache/ansible.files.checkout' ), cmap=COMPONENTMAP_FILENAME, ) module_cache_file = '/tmp/mi-modules.json' if not os.path.isfile(module_cache_file): module_maintainers = get_maintainers_mapping() MI = ModuleIndexer(maintainers=module_maintainers) MI.get_ansible_modules() with open(module_cache_file, 'wb') as f: f.write(json.dumps(MI.modules, sort_keys=True, indent=2)) modules = MI.modules else: with open(module_cache_file, 'rb') as f: modules = json.loads(f.read()) macro_teams = { 'Qalthos,gundalow,privateip': 'openswitch', 'Qalthos,ganeshrn,gundalow,privateip,rcarrillocruz,trishnaguha': 'networking', 'GGabriele,jedelman8,mikewiebe,privateip,rahushen,rcarrillocruz,trishnaguha': 'nxos', 'emonty,j2sol,juliakreger,rcarrillocruz,shrews,thingee': 'openstack', 'chrishoffman,manuel-sousa,romanek-adam': 'rabbitmq', 'alikins,barnabycourt,flossware,vritant': 'rhn', 'Qalthos,amitsi,gundalow,privateip': 'netvisor', 'haroldwongms,nitzmahone,tstringer': 'azure', 'dagwieers,jborean93,jhawkesworth': 'windows', 'dagwieers,dav1x,jctanner': 'vmware', 'isharacomix,jrrivers,privateip': 'cumulus', 'chiradeep,giorgos-nikolopoulos': 'netscaler', 'ericsysmin,grastogi23,khaltore': 'avi', 'ghjm,jlaska,matburt,wwitzel3': 'tower', 'hulquest,lmprice,timuster': 'netapp', } usermap = { 'mpdehaan': False } namemap = { 'Shrews': 'shrews' } exclusions = { '*': ['chouseknecht', 'Java1Guy', 'franckcuny', 'mhite', 'bennojoy', 'risaacson', 'whenrik'], 'network/wakeonlan': ['dagwiers'], } removed = get_removed_maintainers() teams = {} data = {} data['files'] = {} # merge the moduleindexer data for k,v in modules.items(): fp = v.get('filepath') if not fp or not fp.startswith('lib/ansible'): continue data['files'][k] = {} if v['_maintainers']: data['files'][k]['maintainers'] = [] data['files'][k]['maintainers'] = [x for x in v['_maintainers']] if v['authors']: if 'maintainers' not in data['files'][k]: data['files'][k]['maintainers'] = [] data['files'][k]['maintainers'] += v['authors'] data['files'][k]['maintainers'] = sorted(set(data['files'][k]['maintainers'])) # validate each maintainer exists if 'maintainers' in data['files'][k]: maintainers = [] for x in data['files'][k]['maintainers']: if x in exclusions['*']: continue if x in namemap: x = namemap[x] if x in usermap: if usermap[x]: maintainers.append(x) else: if x == 'ansible': usermap['ansible'] = True maintainers.append(x) continue res = requests.get('https://github.com/%s' % x) if res.status_code == 200: usermap[x] = True maintainers.append(x) else: usermap[x] = False data['files'][k]['maintainers'] = sorted(set(maintainers)) if not data['files'][k]['maintainers']: data['files'][k].pop('maintainers', None) # merge the removed people for k,v in removed.items(): k = os.path.join('lib/ansible/modules', k) v = sorted(set(v)) if k in data['files']: if 'maintainers' in data['files'][k]: for vx in v: if vx in data['files'][k]['maintainers']: data['files'][k]['maintainers'].remove(vx) if 'ignored' not in data['files'][k]: data['files'][k]['ignored'] = [] data['files'][k]['ignored'].append(vx) if not data['files'][k]['maintainers']: data['files'][k].pop('maintainers', None) #import epdb; epdb.st() # merge the fileindexer data for k in FI.files: #if 'contrib/inventory' in k: # import epdb; epdb.st() #print(k) try: klabels = FI.get_component_labels(valid_labels, [k]) if klabels: klabels = [x for x in klabels if not x.startswith('c:')] if not klabels: continue if k not in data['files']: data['files'][k] = {} if 'labels' not in data['files'][k]: data['files'][k]['labels'] = [] data['files'][k]['labels'] += klabels except UnicodeDecodeError: continue keywords = FI.get_keywords_for_file(k) if keywords: if k not in data['files']: data['files'][k] = {} if 'keywords' not in data['files'][k]: data['files'][k]['keywords'] = [] data['files'][k]['keywords'] += keywords #import epdb; epdb.st() ''' # calculate all teams for k,v in data['files'].items(): if not v.get('maintainers'): continue maintainers = sorted(set(v['maintainers'])) key = ','.join(maintainers) if key not in teams: teams[key] = [] teams[key].append(k) # rank and show steams = sorted(teams, key=len, reverse=True) for x in steams[0:15]: if x in macro_teams: continue pprint(teams[x]) print(x) import epdb; epdb.st() import epdb; epdb.st() ''' for k,v in data['files'].items(): if not v.get('maintainers'): continue maintainers = v.get('maintainers') for idx,x in enumerate(maintainers): if x == 'ansible': maintainers[idx] = '$team_ansible' if maintainers == ['$team_ansible']: data['files'][k]['maintainers'] = ' '.join(maintainers) continue if len(maintainers) == 1: data['files'][k]['maintainers'] = ' '.join(maintainers) continue mkey = ','.join(sorted(set(maintainers))) if mkey in macro_teams: maintainers = ['$team_%s' % macro_teams[mkey]] data['files'][k]['maintainers'] = ' '.join(maintainers) else: # partial matching match = None subnames = sorted(set(maintainers)) for sn in subnames: filtered = [x for x in subnames if x != sn] fkey = ','.join(filtered) if fkey in macro_teams: match = fkey if match: to_clear = match.split(',') maintainers = [x for x in maintainers if x not in to_clear] data['files'][k]['maintainers'] = ' '.join(maintainers) # fix deprecations safe_names = [x for x in FI.files if all(c in string.printable for c in x)] remove = [] for k,v in data['files'].items(): maintainers = v.get('maintainers') if maintainers: if 'DEPRECATED' in data['files'][k]['maintainers']: data['files'][k].pop('maintainers', None) data['files'][k]['deprecated'] = True bn = os.path.basename(k) if bn.startswith('_') and bn != '__init__.py' and '/modules/' in k: ''' data['files'][k]['deprecated'] = True if 'maintainers' in data['files'][k]: data['files'][k].pop('maintainers', None) ''' remove.append(k) # get rid of files no longer in the repo if k not in safe_names: remove.append(k) for x in remove: data['files'].pop(x, None) # remove any keys where maintainers == authors remove = [] for k,v in data['files'].items(): if v.keys() != ['maintainers']: continue if v['maintainers'] != modules[k]['authors']: continue remove.append(k) for x in remove: data['files'].pop(x, None) ##################################### # add special notifies ##################################### data['files']['lib/ansible/modules/cloud/amazon/'] = { 'notify': ['willthames'] } ##################################### # reduce to namespace maintainers ##################################### groups = {} for k,v in data['files'].items(): dn = os.path.dirname(k) if dn not in groups: groups[dn] = { 'matches': [], 'values': [] } groups[dn]['matches'].append(k) if v not in groups[dn]['values']: groups[dn]['values'].append(v) for k,v in groups.items(): if not len(v['values']) == 1: continue if len(v['matches']) == 1: continue #print(k) #pprint(v) newk = k + '/' data['files'][newk] = v['values'][0] for pf in v['matches']: data['files'].pop(pf, None) if newk in removed: import epdb; epdb.st() ##################################### # make a sorted dict ##################################### files = data['files'] data['files'] = OrderedDict() fkeys = sorted(files.keys()) fkeys = [x.replace('lib/ansible/modules', '$modules') for x in fkeys] fkeys = sorted(set(fkeys)) for fkey in fkeys: if fkey.startswith('$modules'): mkey = fkey.replace('$modules', 'lib/ansible/modules') data['files'][fkey] = files[mkey] else: data['files'][fkey] = files[fkey] data['macros'] = OrderedDict() data['macros']['modules'] = 'lib/ansible/modules' macro_items = macro_teams.items() macro_items = [[x[1],x[0]] for x in macro_items] macro_dict ={} for x in macro_items: macro_dict[x[0]] = x[1] data['macros']['team_ansible'] = [] keys = macro_dict.keys() for k in sorted(keys): team = macro_dict[k] team = team.split(',') if len(team) < 10: team = " ".join(team) data['macros']['team_%s' % k] = team # if maintainers is the only subkey, make the primary value a string for k,v in data['files'].items(): keys = v.keys() if keys == ['maintainers']: if isinstance(v['maintainers'], list): data['files'][k] = " ".join(v['maintainers']) else: data['files'][k] = v['maintainers'] for xk in ['ignored', 'notified', 'maintainers']: if xk in data['files'][k]: if not isinstance(data['files'][k][xk], (str, unicode)): data['files'][k][xk] = " ".join(data['files'][k][xk]) # write it once with ryaml to make it ordered ryaml = rYAML() (fo, fn) = tempfile.mkstemp() with open(fn, 'wb') as f: ryaml.dump(data, f) # read it back in with open(fn, 'rb') as f: ylines = f.readlines() phase = None for idx,x in enumerate(ylines): x = x.rstrip() x = x.replace('!!omap', '') if x.endswith(' {}'): x = x.replace(' {}', '') if x.startswith('-'): x = x.replace('-', ' ', 1) ylines[idx] = x if x.startswith(' ') and ':' not in x and '-' not in x: ylines[idx-1] += ' ' + x.strip() ylines[idx] = '' ylines = [x for x in ylines if x.strip()] ylines = [HEADER] + ylines with open(dest, 'wb') as f: f.write('\n'.join(ylines))