def pullrequest_filepath_exists(self, filepath): ''' Check if a file exists on the submitters branch ''' # https://github.com/ansible/ansibullbot/issues/406 # https://developer.github.com/v3/repos/contents/ # GET /repos/:owner/:repo/readme # "contents_url": # "https://api.github.com/repos/ganeshrn/ansible/contents/{+path}", # self.pullrequest.head # - ref --> branch name # - repo.full_name sha = self.pullrequest.head.sha pdata = None resp = None cachefile = os.path.join( self.cachedir, u'issues', to_text(self.number), u'shippable_yml.pickle' ) try: if os.path.isfile(cachefile): with open(cachefile, 'rb') as f: pdata = pickle_load(f) except Exception as e: logging.error(u'failed to unpickle %s %s' % (cachefile, to_text(e))) if not pdata or pdata[0] != sha: if self.pullrequest.head.repo: url = self.pullrequest.head.repo.url + u'/contents/' + filepath resp = self.pullrequest._requester.requestJson( u"GET", url, input={u'ref': self.pullrequest.head.ref} ) else: # https://github.com/ansible/ansible/pull/19891 # Sometimes the repo repo/branch has disappeared resp = [None] pdata = [sha, resp] with open(cachefile, 'wb') as f: pickle_dump(pdata, f) else: resp = pdata[1] result = False if resp[0]: result = True return result
def load_update_fetch(self, property_name): '''Fetch a get() property for an object''' edata = None events = [] updated = None update = False write_cache = False self.repo.update() pfile = os.path.join(self.cachedir, u'%s.pickle' % property_name) pdir = os.path.dirname(pfile) if not os.path.isdir(pdir): os.makedirs(pdir) if os.path.isfile(pfile): try: with open(pfile, 'rb') as f: edata = pickle_load(f) except Exception as e: update = True write_cache = True # check the timestamp on the cache if edata: updated = edata[0] events = edata[1] if updated < self.repo.updated_at: update = True write_cache = True # pull all events if timestamp is behind or no events cached if update or not events: write_cache = True updated = self.get_current_time() try: methodToCall = getattr(self.repo, u'get_' + property_name) except Exception as e: logging.error(e) if C.DEFAULT_BREAKPOINTS: logging.error(u'breakpoint!') import epdb epdb.st() else: raise Exception(u'unable to get %s' % property_name) events = [x for x in methodToCall()] if C.DEFAULT_PICKLE_ISSUES: if write_cache or not os.path.isfile(pfile): # need to dump the pickle back to disk edata = [updated, events] with open(pfile, 'wb') as f: pickle_dump(edata, f) return events
def load_update_fetch(self, property_name): '''Fetch a get() property for an object''' edata = None events = [] updated = None update = False write_cache = False self.repo.update() pfile = os.path.join(self.cachedir, u'%s.pickle' % property_name) pdir = os.path.dirname(pfile) if not os.path.isdir(pdir): os.makedirs(pdir) if os.path.isfile(pfile): try: with open(pfile, 'rb') as f: edata = pickle_load(f) except Exception as e: update = True write_cache = True # check the timestamp on the cache if edata: updated = edata[0] events = edata[1] if updated < self.repo.updated_at: update = True write_cache = True # pull all events if timestamp is behind or no events cached if update or not events: write_cache = True updated = self.get_current_time() try: methodToCall = getattr(self.repo, u'get_' + property_name) except Exception as e: logging.error(e) if C.DEFAULT_BREAKPOINTS: logging.error(u'breakpoint!') import epdb; epdb.st() else: raise Exception(u'unable to get %s' % property_name) events = [x for x in methodToCall()] if write_cache or not os.path.isfile(pfile): # need to dump the pickle back to disk edata = [updated, events] with open(pfile, 'wb') as f: pickle_dump(edata, f) return events
def load_issue(self, number): pfile = os.path.join(self.cachedir, u'issues', to_text(number), u'issue.pickle') if os.path.isfile(pfile): with open(pfile, 'rb') as f: try: issue = pickle_load(f) except TypeError: return False return issue else: return False
def load_pullrequest(self, number): pfile = os.path.join(self.cachedir, u'issues', to_text(number), u'pullrequest.pickle') pdir = os.path.dirname(pfile) if not os.path.isdir(pdir): os.makedirs(pdir) if os.path.isfile(pfile): with open(pfile, 'rb') as f: issue = pickle_load(f) return issue else: return False
def get_pullrequest_status(self, force_fetch=False): fetched = False jdata = None pdata = None # pull out the status url from the raw data rd = self.pullrequest_raw_data surl = rd[u'statuses_url'] pfile = os.path.join(self.full_cachedir, u'pr_status.pickle') pdir = os.path.dirname(pfile) if not os.path.isdir(pdir): os.makedirs(pdir) if os.path.isfile(pfile): logging.info(u'pullrequest_status load pfile') with open(pfile, 'rb') as f: pdata = pickle_load(f) if pdata: # is the data stale? if pdata[0] < self.pullrequest.updated_at or force_fetch: logging.info(u'fetching pr status: stale, previous from %s' % pdata[0]) jdata = self.github.get_request(surl) if isinstance(jdata, dict): # https://github.com/ansible/ansibullbot/issues/959 logging.error( u'Got the following error while fetching PR status: %s', jdata.get(u'message')) logging.error(jdata) return [] self.log_ci_status(jdata) fetched = True else: jdata = pdata[1] # missing? if not jdata: logging.info(u'fetching pr status: !data') jdata = self.github.get_request(surl) # FIXME? should we self.log_ci_status(jdata) here too? fetched = True if fetched or not os.path.isfile(pfile): logging.info(u'writing %s' % pfile) pdata = (self.pullrequest.updated_at, jdata) with open(pfile, 'wb') as f: pickle_dump(pdata, f) return jdata
def load_issue(self, number): pfile = os.path.join( self.cachedir, u'issues', to_text(number), u'issue.pickle' ) if os.path.isfile(pfile): with open(pfile, 'rb') as f: issue = pickle_load(f) return issue else: return False
def _load_cache(self): if not os.path.isdir(self.cachedir): os.makedirs(self.cachedir) if not os.path.isfile(self.cachefile): logging.info(u'!%s' % self.cachefile) return None try: with open(self.cachefile, 'rb') as f: cachedata = pickle_load(f) except Exception as e: logging.debug(e) logging.info(u'%s failed to load' % self.cachefile) cachedata = None return cachedata
def load_pullrequest(self, number): pfile = os.path.join( self.cachedir, u'issues', to_text(number), u'pullrequest.pickle' ) pdir = os.path.dirname(pfile) if not os.path.isdir(pdir): os.makedirs(pdir) if os.path.isfile(pfile): with open(pfile, 'rb') as f: issue = pickle_load(f) return issue else: return False
def _load_cache(self): if not os.path.isdir(self.cachedir): os.makedirs(self.cachedir) if not os.path.isfile(self.cachefile): logging.info(u'!%s' % self.cachefile) return None try: with open(self.cachefile, 'rb') as f: cachedata = pickle_load(f) except Exception as e: logging.debug(e) logging.info(u'%s failed to load' % self.cachefile) cachedata = None cachedata[u'history'] = self._fix_comments_with_no_body( cachedata[u'history']) return cachedata
def get_members(self, organization): """Get members of an organization Args: organization: name of the organization Returns: A list of GitHub login belonging to the organization """ members = [] update = False write_cache = False now = self.get_current_time() gh_org = self._connect().get_organization(organization) cachedir = os.path.join(self.cachedir_base, organization) if not os.path.isdir(cachedir): os.makedirs(cachedir) cachefile = os.path.join(cachedir, 'members.pickle') if os.path.isfile(cachefile): with open(cachefile, 'rb') as f: mdata = pickle_load(f) members = mdata[1] if mdata[0] < gh_org.updated_at: update = True else: update = True write_cache = True if update: members = gh_org.get_members() members = [x.login for x in members] # save the data if write_cache: mdata = [now, members] with open(cachefile, 'wb') as f: pickle_dump(mdata, f) return members
def jobs(self): if self._jobs is None: if self.build_id: if not os.path.isdir(self._cachedir): os.makedirs(self._cachedir) cache_file = os.path.join( self._cachedir, u'timeline_%s.pickle' % self.build_id) resp = fetch(TIMELINE_URL_FMT % self.build_id) if resp is None: data = None if os.path.isfile(cache_file): logging.info( u'timeline was probably removed, load it from cache' ) with open(cache_file, 'rb') as f: data = pickle_load(f) else: data = resp.json() data = (strip_time_safely(data['lastChangedOn']), data) logging.info(u'writing %s' % cache_file) with open(cache_file, 'wb') as f: pickle_dump(data, f) if data is not None: data = data[1] self._jobs = [ r for r in data['records'] if r['type'] == 'Job' ] self._updated_at = strip_time_safely( data['lastChangedOn']) # FIXME self._stages = [ r for r in data['records'] if r['type'] == 'Stage' ] # FIXME else: self._jobs = [] self._updated_at = strip_time_safely('1970-01-01') self._stages = [] else: self._jobs = [] return self._jobs
def get_artifact(self, name, url): if not os.path.isdir(self._cachedir): os.makedirs(self._cachedir) data = None cache_file = os.path.join( self._cachedir, u'%s_%s.pickle' % (name.replace(' ', '-'), self.build_id)) if os.path.isfile(cache_file): logging.info(u'loading %s' % cache_file) with open(cache_file, 'rb') as f: data = pickle_load(f) if data is None or (data and data[0] < self.updated_at) or not data[1]: if data: logging.info(u'fetching artifacts: stale, previous from %s' % data[0]) else: logging.info(u'fetching artifacts: stale, no previous data') resp = fetch(url, stream=True) if resp is not None: with BytesIO() as data: for chunk in resp.iter_content(chunk_size=128): data.write(chunk) artifact_zip = ZipFile(data) artifact_data = [] for fn in artifact_zip.namelist(): if 'ansible-test-' not in fn: continue with artifact_zip.open(fn) as f: artifact_data.append(json.load(f)) data = (self.updated_at, artifact_data) logging.info(u'writing %s' % cache_file) with open(cache_file, 'wb') as f: pickle_dump(data, f) if data: return data[1]
def artifacts(self): if self._artifacts is None: # FIXME deduplicate code if not os.path.isdir(self._cachedir): os.makedirs(self._cachedir) data = None cache_file = os.path.join(self._cachedir, u'artifacts_%s.pickle' % self.build_id) if os.path.isfile(cache_file): logging.info(u'load artifacts cache') with open(cache_file, 'rb') as f: data = pickle_load(f) if data is None or (data and data[0] < self.updated_at) or not data[1]: if data: logging.info( u'fetching artifacts: stale, previous from %s' % data[0]) else: logging.info( u'fetching artifacts: stale, no previous data') resp = fetch(ARTIFACTS_URL_FMT % self.build_id) if resp is not None: data = [ a for a in resp.json()['value'] if a['name'].startswith('Bot') ] data = (self.updated_at, data) logging.info(u'writing %s' % cache_file) with open(cache_file, 'wb') as f: pickle_dump(data, f) if data: self._artifacts = data[1] return self._artifacts
def load_issues(self, state=u'open', filter=None): issues = [] gfiles = glob.glob(u'%s/issues/*/issue.pickle' % self.cachedir) for gf in gfiles: if filter: gf_parts = gf.split(u'/') this_number = gf_parts[-2] this_number = int(this_number) if this_number not in filter: continue logging.debug(u'load %s' % gf) issue = None try: with open(gf, 'rb') as f: issue = pickle_load(f) except EOFError as e: # this is bad, get rid of it logging.error(e) os.remove(gf) if issue: issues.append(issue) return issues
def main(): pprint(sys.argv) dest = sys.argv[1] print('dest: %s' % dest) # get_valid_labels('ansible/ansible') # /home/jtanner/.ansibullbot/cache/ansible/ansible/labels.pickle with open( os.path.expanduser( '~/.ansibullbot/cache/ansible/ansible/labels.pickle'), 'rb') as f: labels = pickle_load(f) valid_labels = [x.name for x in labels[1]] FILEMAP_FILENAME = 'FILEMAP.json' COMPONENTMAP_FILENAME = 'COMPONENTMAP.json' FI = FileIndexer( checkoutdir=os.path.expanduser( '~/.ansibullbot/cache/ansible.files.checkout'), cmap=COMPONENTMAP_FILENAME, ) module_cache_file = '/tmp/mi-modules.json' if not os.path.isfile(module_cache_file): module_maintainers = get_maintainers_mapping() MI = ModuleIndexer(maintainers=module_maintainers) MI.get_ansible_modules() with open(module_cache_file, 'wb') as f: f.write(json.dumps(MI.modules, sort_keys=True, indent=2)) modules = MI.modules else: with open(module_cache_file, 'rb') as f: modules = json.loads(f.read()) macro_teams = { 'Qalthos,gundalow,privateip': 'openswitch', 'Qalthos,ganeshrn,gundalow,privateip,rcarrillocruz,trishnaguha': 'networking', 'GGabriele,jedelman8,mikewiebe,privateip,rahushen,rcarrillocruz,trishnaguha': 'nxos', 'emonty,j2sol,juliakreger,rcarrillocruz,shrews,thingee': 'openstack', 'chrishoffman,manuel-sousa,romanek-adam': 'rabbitmq', 'alikins,barnabycourt,flossware,vritant': 'rhn', 'Qalthos,amitsi,gundalow,privateip': 'netvisor', 'haroldwongms,nitzmahone,tstringer': 'azure', 'dagwieers,jborean93,jhawkesworth': 'windows', 'dagwieers,dav1x,jctanner': 'vmware', 'isharacomix,jrrivers,privateip': 'cumulus', 'chiradeep,giorgos-nikolopoulos': 'netscaler', 'ericsysmin,grastogi23,khaltore': 'avi', 'ghjm,jlaska,matburt,wwitzel3': 'tower', 'hulquest,lmprice,timuster': 'netapp', } usermap = {'mpdehaan': False} namemap = {'Shrews': 'shrews'} exclusions = { '*': [ 'chouseknecht', 'Java1Guy', 'franckcuny', 'mhite', 'bennojoy', 'risaacson', 'whenrik' ], 'network/wakeonlan': ['dagwiers'], } removed = get_removed_maintainers() teams = {} data = {} data['files'] = {} # merge the moduleindexer data for k, v in modules.items(): fp = v.get('filepath') if not fp or not fp.startswith('lib/ansible'): continue data['files'][k] = {} if v['_maintainers']: data['files'][k]['maintainers'] = [] data['files'][k]['maintainers'] = [x for x in v['_maintainers']] if v['authors']: if 'maintainers' not in data['files'][k]: data['files'][k]['maintainers'] = [] data['files'][k]['maintainers'] += v['authors'] data['files'][k]['maintainers'] = sorted( set(data['files'][k]['maintainers'])) # validate each maintainer exists if 'maintainers' in data['files'][k]: maintainers = [] for x in data['files'][k]['maintainers']: if x in exclusions['*']: continue if x in namemap: x = namemap[x] if x in usermap: if usermap[x]: maintainers.append(x) else: if x == 'ansible': usermap['ansible'] = True maintainers.append(x) continue res = requests.get('https://github.com/%s' % x) if res.status_code == 200: usermap[x] = True maintainers.append(x) else: usermap[x] = False data['files'][k]['maintainers'] = sorted(set(maintainers)) if not data['files'][k]['maintainers']: data['files'][k].pop('maintainers', None) # merge the removed people for k, v in removed.items(): k = os.path.join('lib/ansible/modules', k) v = sorted(set(v)) if k in data['files']: if 'maintainers' in data['files'][k]: for vx in v: if vx in data['files'][k]['maintainers']: data['files'][k]['maintainers'].remove(vx) if 'ignored' not in data['files'][k]: data['files'][k]['ignored'] = [] data['files'][k]['ignored'].append(vx) if not data['files'][k]['maintainers']: data['files'][k].pop('maintainers', None) #import epdb; epdb.st() # merge the fileindexer data for k in FI.files: #if 'contrib/inventory' in k: # import epdb; epdb.st() #print(k) try: klabels = FI.get_component_labels(valid_labels, [k]) if klabels: klabels = [x for x in klabels if not x.startswith('c:')] if not klabels: continue if k not in data['files']: data['files'][k] = {} if 'labels' not in data['files'][k]: data['files'][k]['labels'] = [] data['files'][k]['labels'] += klabels except UnicodeDecodeError: continue keywords = FI.get_keywords_for_file(k) if keywords: if k not in data['files']: data['files'][k] = {} if 'keywords' not in data['files'][k]: data['files'][k]['keywords'] = [] data['files'][k]['keywords'] += keywords #import epdb; epdb.st() ''' # calculate all teams for k,v in data['files'].items(): if not v.get('maintainers'): continue maintainers = sorted(set(v['maintainers'])) key = ','.join(maintainers) if key not in teams: teams[key] = [] teams[key].append(k) # rank and show steams = sorted(teams, key=len, reverse=True) for x in steams[0:15]: if x in macro_teams: continue pprint(teams[x]) print(x) import epdb; epdb.st() import epdb; epdb.st() ''' for k, v in data['files'].items(): if not v.get('maintainers'): continue maintainers = v.get('maintainers') for idx, x in enumerate(maintainers): if x == 'ansible': maintainers[idx] = '$team_ansible' if maintainers == ['$team_ansible']: data['files'][k]['maintainers'] = ' '.join(maintainers) continue if len(maintainers) == 1: data['files'][k]['maintainers'] = ' '.join(maintainers) continue mkey = ','.join(sorted(set(maintainers))) if mkey in macro_teams: maintainers = ['$team_%s' % macro_teams[mkey]] data['files'][k]['maintainers'] = ' '.join(maintainers) else: # partial matching match = None subnames = sorted(set(maintainers)) for sn in subnames: filtered = [x for x in subnames if x != sn] fkey = ','.join(filtered) if fkey in macro_teams: match = fkey if match: to_clear = match.split(',') maintainers = [x for x in maintainers if x not in to_clear] data['files'][k]['maintainers'] = ' '.join(maintainers) # fix deprecations safe_names = [x for x in FI.files if all(c in string.printable for c in x)] remove = [] for k, v in data['files'].items(): maintainers = v.get('maintainers') if maintainers: if 'DEPRECATED' in data['files'][k]['maintainers']: data['files'][k].pop('maintainers', None) data['files'][k]['deprecated'] = True bn = os.path.basename(k) if bn.startswith('_') and bn != '__init__.py' and '/modules/' in k: ''' data['files'][k]['deprecated'] = True if 'maintainers' in data['files'][k]: data['files'][k].pop('maintainers', None) ''' remove.append(k) # get rid of files no longer in the repo if k not in safe_names: remove.append(k) for x in remove: data['files'].pop(x, None) # remove any keys where maintainers == authors remove = [] for k, v in data['files'].items(): if v.keys() != ['maintainers']: continue if v['maintainers'] != modules[k]['authors']: continue remove.append(k) for x in remove: data['files'].pop(x, None) ##################################### # add special notifies ##################################### data['files']['lib/ansible/modules/cloud/amazon/'] = { 'notify': ['willthames'] } ##################################### # reduce to namespace maintainers ##################################### groups = {} for k, v in data['files'].items(): dn = os.path.dirname(k) if dn not in groups: groups[dn] = {'matches': [], 'values': []} groups[dn]['matches'].append(k) if v not in groups[dn]['values']: groups[dn]['values'].append(v) for k, v in groups.items(): if not len(v['values']) == 1: continue if len(v['matches']) == 1: continue #print(k) #pprint(v) newk = k + '/' data['files'][newk] = v['values'][0] for pf in v['matches']: data['files'].pop(pf, None) if newk in removed: import epdb epdb.st() ##################################### # make a sorted dict ##################################### files = data['files'] data['files'] = OrderedDict() fkeys = sorted(files.keys()) fkeys = [x.replace('lib/ansible/modules', '$modules') for x in fkeys] fkeys = sorted(set(fkeys)) for fkey in fkeys: if fkey.startswith('$modules'): mkey = fkey.replace('$modules', 'lib/ansible/modules') data['files'][fkey] = files[mkey] else: data['files'][fkey] = files[fkey] data['macros'] = OrderedDict() data['macros']['modules'] = 'lib/ansible/modules' macro_items = macro_teams.items() macro_items = [[x[1], x[0]] for x in macro_items] macro_dict = {} for x in macro_items: macro_dict[x[0]] = x[1] data['macros']['team_ansible'] = [] keys = macro_dict.keys() for k in sorted(keys): team = macro_dict[k] team = team.split(',') if len(team) < 10: team = " ".join(team) data['macros']['team_%s' % k] = team # if maintainers is the only subkey, make the primary value a string for k, v in data['files'].items(): keys = v.keys() if keys == ['maintainers']: if isinstance(v['maintainers'], list): data['files'][k] = " ".join(v['maintainers']) else: data['files'][k] = v['maintainers'] for xk in ['ignored', 'notified', 'maintainers']: if xk in data['files'][k]: if not isinstance(data['files'][k][xk], (str, unicode)): data['files'][k][xk] = " ".join(data['files'][k][xk]) # write it once with ryaml to make it ordered ryaml = rYAML() (fo, fn) = tempfile.mkstemp() with open(fn, 'wb') as f: ryaml.dump(data, f) # read it back in with open(fn, 'rb') as f: ylines = f.readlines() phase = None for idx, x in enumerate(ylines): x = x.rstrip() x = x.replace('!!omap', '') if x.endswith(' {}'): x = x.replace(' {}', '') if x.startswith('-'): x = x.replace('-', ' ', 1) ylines[idx] = x if x.startswith(' ') and ':' not in x and '-' not in x: ylines[idx - 1] += ' ' + x.strip() ylines[idx] = '' ylines = [x for x in ylines if x.strip()] ylines = [HEADER] + ylines with open(dest, 'wb') as f: f.write('\n'.join(ylines))
def load_update_fetch(self, property_name, obj=None, force=False): '''Fetch a property for an issue object''' # A pygithub issue object has methods such as ... # - get_events() # - get_comments() # Those methods return a list with no update() property, # so we can't take advantage of the caching scheme used # for the issue it's self. Instead this function calls # those methods by their given name, and write the data # to a pickle file with a timestamp for the fetch time. # Upon later loading of the pickle, the timestamp is # compared to the issue's update_at timestamp and if the # pickle data is behind, the process will be repeated. edata = None events = [] updated = None update = False write_cache = False pfile = os.path.join(self.full_cachedir, u'%s.pickle' % property_name) pdir = os.path.dirname(pfile) logging.debug(pfile) if not os.path.isdir(pdir): os.makedirs(pdir) if os.path.isfile(pfile): try: with open(pfile, 'rb') as f: edata = pickle_load(f) except Exception as e: update = True write_cache = True # check the timestamp on the cache if edata: updated = edata[0] events = edata[1] if updated < self.instance.updated_at: update = True write_cache = True baseobj = None if obj: if obj == u'issue': baseobj = self.instance elif obj == u'pullrequest': baseobj = self.pullrequest else: if hasattr(self.instance, u'get_' + property_name): baseobj = self.instance else: if self.pullrequest: if hasattr(self.pullrequest, u'get_' + property_name): baseobj = self.pullrequest if not baseobj: logging.error( u'%s was not a property for the issue or the pullrequest' % property_name) if C.DEFAULT_BREAKPOINTS: logging.error(u'breakpoint!') import epdb epdb.st() else: raise Exception(u'property error') # pull all events if timestamp is behind or no events cached if update or not events or force: write_cache = True updated = datetime.datetime.utcnow() if not hasattr(baseobj, u'get_' + property_name) \ and hasattr(baseobj, property_name): # !callable properties try: methodToCall = getattr(baseobj, property_name) except Exception as e: logging.error(e) if C.DEFAULT_BREAKPOINTS: logging.error(u'breakpoint!') import epdb epdb.st() else: raise Exception(to_text(e)) events = methodToCall else: # callable properties try: methodToCall = getattr(baseobj, u'get_' + property_name) except Exception as e: logging.error(e) if C.DEFAULT_BREAKPOINTS: logging.error(u'breakpoint!') import epdb epdb.st() else: raise Exception(to_text(e)) events = [x for x in methodToCall()] if C.DEFAULT_PICKLE_ISSUES: if write_cache or not os.path.isfile(pfile) or force: # need to dump the pickle back to disk edata = [updated, events] with open(pfile, 'wb') as f: pickle_dump(edata, f) return events
def get_pullrequest_status(self, force_fetch=False): def sort_unique_statuses(statuses): '''reduce redundant statuses to the final run for each id''' result = [] groups = [] thisgroup = [] for idx, x in enumerate(statuses): if not thisgroup: thisgroup.append(x) if idx == len(statuses) - 1: groups.append(thisgroup) continue else: if thisgroup[-1][u'target_url'] == x[u'target_url']: thisgroup.append(x) else: groups.append(thisgroup) thisgroup = [] thisgroup.append(x) if idx == len(statuses) - 1: groups.append(thisgroup) for group in groups: group.sort(key=operator.itemgetter(u'updated_at')) result.append(group[-1]) return result fetched = False jdata = None pdata = None # pull out the status url from the raw data rd = self.pullrequest_raw_data surl = rd[u'statuses_url'] pfile = os.path.join(self.cachedir, u'issues', to_text(self.number), u'pr_status.pickle') pdir = os.path.dirname(pfile) if not os.path.isdir(pdir): os.makedirs(pdir) if os.path.isfile(pfile): logging.info(u'pullrequest_status load pfile') with open(pfile, 'rb') as f: pdata = pickle_load(f) if pdata: # is the data stale? if pdata[0] < self.pullrequest.updated_at or force_fetch: logging.info(u'fetching pr status: stale, previous from %s' % pdata[0]) jdata = self._fetch_api_url(surl) self.log_ci_status(jdata) fetched = True else: jdata = pdata[1] # missing? if not jdata: logging.info(u'fetching pr status: !data') jdata = self._fetch_api_url(surl) fetched = True if fetched or not os.path.isfile(pfile): logging.info(u'writing %s' % pfile) pdata = (self.pullrequest.updated_at, jdata) with open(pfile, 'wb') as f: pickle_dump(pdata, f) # remove intermediate duplicates #jdata = sort_unique_statuses(jdata) return jdata
def _get_module_blames(self): ''' Scrape the blame page for each module and store it ''' keys = sorted(self.modules.keys()) # scrape the data for k in keys: cpath = os.path.join(self.gitrepo.checkoutdir, k) if not os.path.isfile(cpath): self.committers[k] = {} continue ghash = self.last_commit_for_file(k) pfile = os.path.join( self.scraper_cache, k.replace(u'/', u'_') + u'.blame.pickle' ) sargs = [u'ansible', u'ansible', u'devel', k] refresh = False if not os.path.isfile(pfile): refresh = True else: logging.debug(u'load {}'.format(pfile)) with open(pfile, 'rb') as f: pdata = pickle_load(f) if C.DEFAULT_BREAKPOINTS: logging.error(u'breakpoint!') import epdb; epdb.st() if pdata[0] == ghash: self.committers[k] = pdata[1] if len(pdata) == 3: # use emailmap if available emailmap = pdata[2] else: emailmap = {} else: refresh = True if refresh: if self.gqlc: logging.debug(u'graphql blame usernames {}'.format(pfile)) uns, emailmap = self.gqlc.get_usernames_from_filename_blame(*sargs) else: emailmap = {} # scrapping: emails not available logging.debug(u'www blame usernames {}'.format(pfile)) uns = self.gws.get_usernames_from_filename_blame(*sargs) self.committers[k] = uns with open(pfile, 'wb') as f: pickle_dump((ghash, uns, emailmap), f) for email, github_id in emailmap.items(): if email not in self.emails_cache: self.emails_cache[email] = github_id # add scraped logins to the map for k in keys: for idx, x in enumerate(self.commits[k]): if x[u'email'] in [u'@']: continue if x[u'email'] not in self.emails_cache: self.emails_cache[x[u'email']] = None if x[u'login']: self.emails_cache[x[u'email']] = x[u'login'] continue xhash = x[u'hash'] for ck, cv in six.iteritems(self.committers[k]): if xhash in cv: self.emails_cache[x[u'email']] = ck break # fill in what we can ... for k in keys: for idx, x in enumerate(self.commits[k]): if not x[u'login']: if x[u'email'] in [u'@']: continue if self.emails_cache[x[u'email']]: login = self.emails_cache[x[u'email']] xhash = x[u'hash'] self.commits[k][idx][u'login'] = login if login not in self.committers[k]: self.committers[k][login] = [] if xhash not in self.committers[k][login]: self.committers[k][login].append(xhash)
def _get_module_blames(self): ''' Scrape the blame page for each module and store it ''' keys = sorted(self.modules.keys()) # scrape the data for k in keys: cpath = os.path.join(self.gitrepo.checkoutdir, k) if not os.path.isfile(cpath): self.committers[k] = {} continue ghash = self.last_commit_for_file(k) pfile = os.path.join(self.scraper_cache, k.replace(u'/', u'_') + u'.blame.pickle') sargs = [u'ansible', u'ansible', u'devel', k] refresh = False if not os.path.isfile(pfile): refresh = True else: logging.debug(u'load {}'.format(pfile)) with open(pfile, 'rb') as f: pdata = pickle_load(f) if C.DEFAULT_BREAKPOINTS: logging.error(u'breakpoint!') import epdb epdb.st() if pdata[0] == ghash: self.committers[k] = pdata[1] if len(pdata) == 3: # use emailmap if available emailmap = pdata[2] else: emailmap = {} else: refresh = True if refresh: if self.gqlc: logging.debug(u'graphql blame usernames {}'.format(pfile)) uns, emailmap = self.gqlc.get_usernames_from_filename_blame( *sargs) else: emailmap = {} # scrapping: emails not available logging.debug(u'www blame usernames {}'.format(pfile)) uns = self.gws.get_usernames_from_filename_blame(*sargs) self.committers[k] = uns with open(pfile, 'wb') as f: pickle_dump((ghash, uns, emailmap), f) for email, github_id in emailmap.items(): if email not in self.emails_cache: self.emails_cache[email] = github_id # add scraped logins to the map for k in keys: for idx, x in enumerate(self.commits[k]): if x[u'email'] in [u'@']: continue if x[u'email'] not in self.emails_cache: self.emails_cache[x[u'email']] = None if x[u'login']: self.emails_cache[x[u'email']] = x[u'login'] continue xhash = x[u'hash'] for ck, cv in six.iteritems(self.committers[k]): if xhash in cv: self.emails_cache[x[u'email']] = ck break # fill in what we can ... for k in keys: for idx, x in enumerate(self.commits[k]): if not x[u'login']: if x[u'email'] in [u'@']: continue if self.emails_cache[x[u'email']]: login = self.emails_cache[x[u'email']] xhash = x[u'hash'] self.commits[k][idx][u'login'] = login if login not in self.committers[k]: self.committers[k][login] = [] if xhash not in self.committers[k][login]: self.committers[k][login].append(xhash)
def get_module_commits(self): keys = self.modules.keys() keys = sorted(keys) for k in keys: self.commits[k] = [] cpath = os.path.join(self.gitrepo.checkoutdir, k) if not os.path.isfile(cpath): continue mtime = os.path.getmtime(cpath) refresh = False pfile = os.path.join(self.scraper_cache, k.replace(u'/', u'_') + u'.commits.pickle') if not os.path.isfile(pfile): refresh = True else: pickle_kwargs = {'encoding': 'bytes'} if six.PY3 else {} print(pfile) with open(pfile, 'rb') as f: pdata = pickle_load(f, **pickle_kwargs) if pdata[0] == mtime: self.commits[k] = pdata[1] else: refresh = True if refresh: logging.info(u'refresh commit cache for %s' % k) cmd = u'cd %s; git log --follow %s' % ( self.gitrepo.checkoutdir, k) (rc, so, se) = run_command(cmd) for line in to_text(so).split(u'\n'): if line.startswith(u'commit '): commit = { u'name': None, u'email': None, u'login': None, u'hash': line.split()[-1], u'date': None } # Author: Matt Clay <*****@*****.**> if line.startswith(u'Author: '): line = line.replace(u'Author: ', u'') line = line.replace(u'<', u'') line = line.replace(u'>', u'') lparts = line.split() if u'@' in lparts[-1]: commit[u'email'] = lparts[-1] commit[u'name'] = u' '.join(lparts[:-1]) else: pass if commit[u'email'] and \ u'noreply.github.com' in commit[u'email']: commit[u'login'] = commit[u'email'].split(u'@')[0] # Date: Sat Jan 28 23:28:53 2017 -0800 if line.startswith(u'Date:'): dstr = line.split(u':', 1)[1].strip() dstr = u' '.join(dstr.split(u' ')[:-1]) ds = datetime.datetime.strptime( to_text(dstr), u'%a %b %d %H:%M:%S %Y') commit[u'date'] = ds self.commits[k].append(commit) with open(pfile, 'wb') as f: pickle_dump((mtime, self.commits[k]), f)
def load_update_fetch(self, property_name, obj=None): '''Fetch a property for an issue object''' # A pygithub issue object has methods such as ... # - get_events() # - get_comments() # Those methods return a list with no update() property, # so we can't take advantage of the caching scheme used # for the issue it's self. Instead this function calls # those methods by their given name, and write the data # to a pickle file with a timestamp for the fetch time. # Upon later loading of the pickle, the timestamp is # compared to the issue's update_at timestamp and if the # pickle data is behind, the process will be repeated. edata = None events = [] updated = None update = False write_cache = False pfile = os.path.join( self.cachedir, u'issues', to_text(self.instance.number), u'%s.pickle' % property_name ) pdir = os.path.dirname(pfile) logging.debug(pfile) if not os.path.isdir(pdir): os.makedirs(pdir) if os.path.isfile(pfile): try: with open(pfile, 'rb') as f: edata = pickle_load(f) except Exception as e: update = True write_cache = True # check the timestamp on the cache if edata: updated = edata[0] events = edata[1] if updated < self.instance.updated_at: update = True write_cache = True baseobj = None if obj: if obj == u'issue': baseobj = self.instance elif obj == u'pullrequest': baseobj = self.pullrequest else: if hasattr(self.instance, u'get_' + property_name): baseobj = self.instance else: if self.pullrequest: if hasattr(self.pullrequest, u'get_' + property_name): baseobj = self.pullrequest if not baseobj: logging.error( u'%s was not a property for the issue or the pullrequest' % property_name ) if C.DEFAULT_BREAKPOINTS: logging.error(u'breakpoint!') import epdb; epdb.st() else: raise Exception(u'property error') # pull all events if timestamp is behind or no events cached if update or not events: write_cache = True updated = self.get_current_time() if not hasattr(baseobj, u'get_' + property_name) \ and hasattr(baseobj, property_name): # !callable properties try: methodToCall = getattr(baseobj, property_name) except Exception as e: logging.error(e) if C.DEFAULT_BREAKPOINTS: logging.error(u'breakpoint!') import epdb; epdb.st() else: raise Exception(to_text(e)) events = methodToCall else: # callable properties try: methodToCall = getattr(baseobj, u'get_' + property_name) except Exception as e: logging.error(e) if C.DEFAULT_BREAKPOINTS: logging.error(u'breakpoint!') import epdb; epdb.st() else: raise Exception(to_text(e)) events = [x for x in methodToCall()] if write_cache or not os.path.isfile(pfile): # need to dump the pickle back to disk edata = [updated, events] with open(pfile, 'wb') as f: pickle_dump(edata, f) return events
def get_pullrequest_status(self, force_fetch=False): def sort_unique_statuses(statuses): '''reduce redundant statuses to the final run for each id''' result = [] groups = [] thisgroup = [] for idx, x in enumerate(statuses): if not thisgroup: thisgroup.append(x) if idx == len(statuses) - 1: groups.append(thisgroup) continue else: if thisgroup[-1][u'target_url'] == x[u'target_url']: thisgroup.append(x) else: groups.append(thisgroup) thisgroup = [] thisgroup.append(x) if idx == len(statuses) - 1: groups.append(thisgroup) for group in groups: group.sort(key=operator.itemgetter(u'updated_at')) result.append(group[-1]) return result fetched = False jdata = None pdata = None # pull out the status url from the raw data rd = self.pullrequest_raw_data surl = rd[u'statuses_url'] pfile = os.path.join( self.cachedir, u'issues', to_text(self.number), u'pr_status.pickle' ) pdir = os.path.dirname(pfile) if not os.path.isdir(pdir): os.makedirs(pdir) if os.path.isfile(pfile): logging.info(u'pullrequest_status load pfile') with open(pfile, 'rb') as f: pdata = pickle_load(f) if pdata: # is the data stale? if pdata[0] < self.pullrequest.updated_at or force_fetch: logging.info(u'fetching pr status: stale, previous from %s' % pdata[0]) jdata = self._fetch_api_url(surl) self.log_ci_status(jdata) fetched = True else: jdata = pdata[1] # missing? if not jdata: logging.info(u'fetching pr status: !data') jdata = self._fetch_api_url(surl) fetched = True if fetched or not os.path.isfile(pfile): logging.info(u'writing %s' % pfile) pdata = (self.pullrequest.updated_at, jdata) with open(pfile, 'wb') as f: pickle_dump(pdata, f) # remove intermediate duplicates #jdata = sort_unique_statuses(jdata) return jdata
def get_module_commits(self): keys = self.modules.keys() keys = sorted(keys) for k in keys: self.commits[k] = [] cpath = os.path.join(self.gitrepo.checkoutdir, k) if not os.path.isfile(cpath): continue mtime = os.path.getmtime(cpath) refresh = False pfile = os.path.join( self.scraper_cache, k.replace(u'/', u'_') + u'.commits.pickle' ) if not os.path.isfile(pfile): refresh = True else: pickle_kwargs = {'encoding': 'bytes'} if six.PY3 else {} print(pfile) with open(pfile, 'rb') as f: pdata = pickle_load(f, **pickle_kwargs) if pdata[0] == mtime: self.commits[k] = pdata[1] else: refresh = True if refresh: logging.info(u'refresh commit cache for %s' % k) cmd = u'cd %s; git log --follow %s' % (self.gitrepo.checkoutdir, k) (rc, so, se) = run_command(cmd) for line in to_text(so).split(u'\n'): if line.startswith(u'commit '): commit = { u'name': None, u'email': None, u'login': None, u'hash': line.split()[-1], u'date': None } # Author: Matt Clay <*****@*****.**> if line.startswith(u'Author: '): line = line.replace(u'Author: ', u'') line = line.replace(u'<', u'') line = line.replace(u'>', u'') lparts = line.split() if u'@' in lparts[-1]: commit[u'email'] = lparts[-1] commit[u'name'] = u' '.join(lparts[:-1]) else: pass if commit[u'email'] and \ u'noreply.github.com' in commit[u'email']: commit[u'login'] = commit[u'email'].split(u'@')[0] # Date: Sat Jan 28 23:28:53 2017 -0800 if line.startswith(u'Date:'): dstr = line.split(u':', 1)[1].strip() dstr = u' '.join(dstr.split(u' ')[:-1]) ds = datetime.datetime.strptime( to_text(dstr), u'%a %b %d %H:%M:%S %Y' ) commit[u'date'] = ds self.commits[k].append(commit) with open(pfile, 'wb') as f: pickle_dump((mtime, self.commits[k]), f)
def get_pullrequest_status(self, force_fetch=False): def sort_unique_statuses(statuses): '''reduce redundant statuses to the final run for each id''' result = [] groups = [] thisgroup = [] for idx, x in enumerate(statuses): if not thisgroup: thisgroup.append(x) if idx == len(statuses) - 1: groups.append(thisgroup) continue else: if thisgroup[-1][u'target_url'] == x[u'target_url']: thisgroup.append(x) else: groups.append(thisgroup) thisgroup = [] thisgroup.append(x) if idx == len(statuses) - 1: groups.append(thisgroup) for group in groups: group.sort(key=operator.itemgetter(u'updated_at')) result.append(group[-1]) return result fetched = False jdata = None pdata = None # pull out the status url from the raw data rd = self.pullrequest_raw_data surl = rd[u'statuses_url'] pfile = os.path.join(self.full_cachedir, u'pr_status.pickle') pdir = os.path.dirname(pfile) if not os.path.isdir(pdir): os.makedirs(pdir) if os.path.isfile(pfile): logging.info(u'pullrequest_status load pfile') with open(pfile, 'rb') as f: pdata = pickle_load(f) if pdata: # is the data stale? if pdata[0] < self.pullrequest.updated_at or force_fetch: logging.info(u'fetching pr status: stale, previous from %s' % pdata[0]) jdata = self.github.get_request(surl) if isinstance(jdata, dict): # https://github.com/ansible/ansibullbot/issues/959 logging.error(u'Got the following error while fetching PR status: %s', jdata.get(u'message')) logging.error(jdata) return [] self.log_ci_status(jdata) fetched = True else: jdata = pdata[1] # missing? if not jdata: logging.info(u'fetching pr status: !data') jdata = self.github.get_request(surl) # FIXME? should we self.log_ci_status(jdata) here too? fetched = True if fetched or not os.path.isfile(pfile): logging.info(u'writing %s' % pfile) pdata = (self.pullrequest.updated_at, jdata) with open(pfile, 'wb') as f: pickle_dump(pdata, f) # remove intermediate duplicates #jdata = sort_unique_statuses(jdata) return jdata
def main(): pprint(sys.argv) dest = sys.argv[1] print('dest: %s' % dest) # get_valid_labels('ansible/ansible') # /home/jtanner/.ansibullbot/cache/ansible/ansible/labels.pickle with open(os.path.expanduser('~/.ansibullbot/cache/ansible/ansible/labels.pickle'), 'rb') as f: labels = pickle_load(f) valid_labels = [x.name for x in labels[1]] FILEMAP_FILENAME = 'FILEMAP.json' COMPONENTMAP_FILENAME = 'COMPONENTMAP.json' FI = FileIndexer( checkoutdir=os.path.expanduser( '~/.ansibullbot/cache/ansible.files.checkout' ), cmap=COMPONENTMAP_FILENAME, ) module_cache_file = '/tmp/mi-modules.json' if not os.path.isfile(module_cache_file): module_maintainers = get_maintainers_mapping() MI = ModuleIndexer(maintainers=module_maintainers) MI.get_ansible_modules() with open(module_cache_file, 'wb') as f: f.write(json.dumps(MI.modules, sort_keys=True, indent=2)) modules = MI.modules else: with open(module_cache_file, 'rb') as f: modules = json.loads(f.read()) macro_teams = { 'Qalthos,gundalow,privateip': 'openswitch', 'Qalthos,ganeshrn,gundalow,privateip,rcarrillocruz,trishnaguha': 'networking', 'GGabriele,jedelman8,mikewiebe,privateip,rahushen,rcarrillocruz,trishnaguha': 'nxos', 'emonty,j2sol,juliakreger,rcarrillocruz,shrews,thingee': 'openstack', 'chrishoffman,manuel-sousa,romanek-adam': 'rabbitmq', 'alikins,barnabycourt,flossware,vritant': 'rhn', 'Qalthos,amitsi,gundalow,privateip': 'netvisor', 'haroldwongms,nitzmahone,tstringer': 'azure', 'dagwieers,jborean93,jhawkesworth': 'windows', 'dagwieers,dav1x,jctanner': 'vmware', 'isharacomix,jrrivers,privateip': 'cumulus', 'chiradeep,giorgos-nikolopoulos': 'netscaler', 'ericsysmin,grastogi23,khaltore': 'avi', 'ghjm,jlaska,matburt,wwitzel3': 'tower', 'hulquest,lmprice,timuster': 'netapp', } usermap = { 'mpdehaan': False } namemap = { 'Shrews': 'shrews' } exclusions = { '*': ['chouseknecht', 'Java1Guy', 'franckcuny', 'mhite', 'bennojoy', 'risaacson', 'whenrik'], 'network/wakeonlan': ['dagwiers'], } removed = get_removed_maintainers() teams = {} data = {} data['files'] = {} # merge the moduleindexer data for k,v in modules.items(): fp = v.get('filepath') if not fp or not fp.startswith('lib/ansible'): continue data['files'][k] = {} if v['_maintainers']: data['files'][k]['maintainers'] = [] data['files'][k]['maintainers'] = [x for x in v['_maintainers']] if v['authors']: if 'maintainers' not in data['files'][k]: data['files'][k]['maintainers'] = [] data['files'][k]['maintainers'] += v['authors'] data['files'][k]['maintainers'] = sorted(set(data['files'][k]['maintainers'])) # validate each maintainer exists if 'maintainers' in data['files'][k]: maintainers = [] for x in data['files'][k]['maintainers']: if x in exclusions['*']: continue if x in namemap: x = namemap[x] if x in usermap: if usermap[x]: maintainers.append(x) else: if x == 'ansible': usermap['ansible'] = True maintainers.append(x) continue res = requests.get('https://github.com/%s' % x) if res.status_code == 200: usermap[x] = True maintainers.append(x) else: usermap[x] = False data['files'][k]['maintainers'] = sorted(set(maintainers)) if not data['files'][k]['maintainers']: data['files'][k].pop('maintainers', None) # merge the removed people for k,v in removed.items(): k = os.path.join('lib/ansible/modules', k) v = sorted(set(v)) if k in data['files']: if 'maintainers' in data['files'][k]: for vx in v: if vx in data['files'][k]['maintainers']: data['files'][k]['maintainers'].remove(vx) if 'ignored' not in data['files'][k]: data['files'][k]['ignored'] = [] data['files'][k]['ignored'].append(vx) if not data['files'][k]['maintainers']: data['files'][k].pop('maintainers', None) #import epdb; epdb.st() # merge the fileindexer data for k in FI.files: #if 'contrib/inventory' in k: # import epdb; epdb.st() #print(k) try: klabels = FI.get_component_labels(valid_labels, [k]) if klabels: klabels = [x for x in klabels if not x.startswith('c:')] if not klabels: continue if k not in data['files']: data['files'][k] = {} if 'labels' not in data['files'][k]: data['files'][k]['labels'] = [] data['files'][k]['labels'] += klabels except UnicodeDecodeError: continue keywords = FI.get_keywords_for_file(k) if keywords: if k not in data['files']: data['files'][k] = {} if 'keywords' not in data['files'][k]: data['files'][k]['keywords'] = [] data['files'][k]['keywords'] += keywords #import epdb; epdb.st() ''' # calculate all teams for k,v in data['files'].items(): if not v.get('maintainers'): continue maintainers = sorted(set(v['maintainers'])) key = ','.join(maintainers) if key not in teams: teams[key] = [] teams[key].append(k) # rank and show steams = sorted(teams, key=len, reverse=True) for x in steams[0:15]: if x in macro_teams: continue pprint(teams[x]) print(x) import epdb; epdb.st() import epdb; epdb.st() ''' for k,v in data['files'].items(): if not v.get('maintainers'): continue maintainers = v.get('maintainers') for idx,x in enumerate(maintainers): if x == 'ansible': maintainers[idx] = '$team_ansible' if maintainers == ['$team_ansible']: data['files'][k]['maintainers'] = ' '.join(maintainers) continue if len(maintainers) == 1: data['files'][k]['maintainers'] = ' '.join(maintainers) continue mkey = ','.join(sorted(set(maintainers))) if mkey in macro_teams: maintainers = ['$team_%s' % macro_teams[mkey]] data['files'][k]['maintainers'] = ' '.join(maintainers) else: # partial matching match = None subnames = sorted(set(maintainers)) for sn in subnames: filtered = [x for x in subnames if x != sn] fkey = ','.join(filtered) if fkey in macro_teams: match = fkey if match: to_clear = match.split(',') maintainers = [x for x in maintainers if x not in to_clear] data['files'][k]['maintainers'] = ' '.join(maintainers) # fix deprecations safe_names = [x for x in FI.files if all(c in string.printable for c in x)] remove = [] for k,v in data['files'].items(): maintainers = v.get('maintainers') if maintainers: if 'DEPRECATED' in data['files'][k]['maintainers']: data['files'][k].pop('maintainers', None) data['files'][k]['deprecated'] = True bn = os.path.basename(k) if bn.startswith('_') and bn != '__init__.py' and '/modules/' in k: ''' data['files'][k]['deprecated'] = True if 'maintainers' in data['files'][k]: data['files'][k].pop('maintainers', None) ''' remove.append(k) # get rid of files no longer in the repo if k not in safe_names: remove.append(k) for x in remove: data['files'].pop(x, None) # remove any keys where maintainers == authors remove = [] for k,v in data['files'].items(): if v.keys() != ['maintainers']: continue if v['maintainers'] != modules[k]['authors']: continue remove.append(k) for x in remove: data['files'].pop(x, None) ##################################### # add special notifies ##################################### data['files']['lib/ansible/modules/cloud/amazon/'] = { 'notify': ['willthames'] } ##################################### # reduce to namespace maintainers ##################################### groups = {} for k,v in data['files'].items(): dn = os.path.dirname(k) if dn not in groups: groups[dn] = { 'matches': [], 'values': [] } groups[dn]['matches'].append(k) if v not in groups[dn]['values']: groups[dn]['values'].append(v) for k,v in groups.items(): if not len(v['values']) == 1: continue if len(v['matches']) == 1: continue #print(k) #pprint(v) newk = k + '/' data['files'][newk] = v['values'][0] for pf in v['matches']: data['files'].pop(pf, None) if newk in removed: import epdb; epdb.st() ##################################### # make a sorted dict ##################################### files = data['files'] data['files'] = OrderedDict() fkeys = sorted(files.keys()) fkeys = [x.replace('lib/ansible/modules', '$modules') for x in fkeys] fkeys = sorted(set(fkeys)) for fkey in fkeys: if fkey.startswith('$modules'): mkey = fkey.replace('$modules', 'lib/ansible/modules') data['files'][fkey] = files[mkey] else: data['files'][fkey] = files[fkey] data['macros'] = OrderedDict() data['macros']['modules'] = 'lib/ansible/modules' macro_items = macro_teams.items() macro_items = [[x[1],x[0]] for x in macro_items] macro_dict ={} for x in macro_items: macro_dict[x[0]] = x[1] data['macros']['team_ansible'] = [] keys = macro_dict.keys() for k in sorted(keys): team = macro_dict[k] team = team.split(',') if len(team) < 10: team = " ".join(team) data['macros']['team_%s' % k] = team # if maintainers is the only subkey, make the primary value a string for k,v in data['files'].items(): keys = v.keys() if keys == ['maintainers']: if isinstance(v['maintainers'], list): data['files'][k] = " ".join(v['maintainers']) else: data['files'][k] = v['maintainers'] for xk in ['ignored', 'notified', 'maintainers']: if xk in data['files'][k]: if not isinstance(data['files'][k][xk], (str, unicode)): data['files'][k][xk] = " ".join(data['files'][k][xk]) # write it once with ryaml to make it ordered ryaml = rYAML() (fo, fn) = tempfile.mkstemp() with open(fn, 'wb') as f: ryaml.dump(data, f) # read it back in with open(fn, 'rb') as f: ylines = f.readlines() phase = None for idx,x in enumerate(ylines): x = x.rstrip() x = x.replace('!!omap', '') if x.endswith(' {}'): x = x.replace(' {}', '') if x.startswith('-'): x = x.replace('-', ' ', 1) ylines[idx] = x if x.startswith(' ') and ':' not in x and '-' not in x: ylines[idx-1] += ' ' + x.strip() ylines[idx] = '' ylines = [x for x in ylines if x.strip()] ylines = [HEADER] + ylines with open(dest, 'wb') as f: f.write('\n'.join(ylines))