def _dump_cache(self): if any(x for x in self.history if not isinstance(x['created_at'], datetime.datetime)): logging.error(self.history) raise AssertionError( u'found a non-datetime created_at in events data') if not os.path.isdir(self.cachedir): os.makedirs(self.cachedir) # keep the timestamp cachedata = { u'version': self.SCHEMA_VERSION, u'updated_at': self.issue.instance.updated_at, u'history': self.history } try: with open(self.cachefile, 'wb') as f: pickle_dump(cachedata, f) except Exception as e: logging.error(e) if C.DEFAULT_BREAKPOINTS: logging.error(u'breakpoint!') import epdb epdb.st() else: raise Exception(u'')
def save_pullrequest(self, issue): cfile = os.path.join(self.cachedir, u'issues', to_text(issue.number), u'pullrequest.pickle') cdir = os.path.dirname(cfile) if not os.path.isdir(cdir): os.makedirs(cdir) with open(cfile, 'wb') as f: pickle_dump(issue, f)
def save_issue(self, issue): cfile = os.path.join(self.cachedir, u'issues', to_text(issue.number), u'issue.pickle') cdir = os.path.dirname(cfile) if not os.path.isdir(cdir): os.makedirs(cdir) logging.debug(u'dump %s' % cfile) with open(cfile, 'wb') as f: pickle_dump(issue, f)
def pullrequest_filepath_exists(self, filepath): ''' Check if a file exists on the submitters branch ''' # https://github.com/ansible/ansibullbot/issues/406 # https://developer.github.com/v3/repos/contents/ # GET /repos/:owner/:repo/readme # "contents_url": # "https://api.github.com/repos/ganeshrn/ansible/contents/{+path}", # self.pullrequest.head # - ref --> branch name # - repo.full_name sha = self.pullrequest.head.sha pdata = None resp = None cachefile = os.path.join(self.cachedir, u'issues', to_text(self.number), u'shippable_yml.pickle') try: if os.path.isfile(cachefile): with open(cachefile, 'rb') as f: pdata = pickle_load(f) except Exception as e: logging.error(u'failed to unpickle %s %s' % (cachefile, to_text(e))) if not pdata or pdata[0] != sha: if self.pullrequest.head.repo: url = u'https://api.github.com/repos/' url += self.pullrequest.head.repo.full_name url += u'/contents/' url += filepath resp = self.pullrequest._requester.requestJson( u"GET", url, input={u'ref': self.pullrequest.head.ref}) else: # https://github.com/ansible/ansible/pull/19891 # Sometimes the repo repo/branch has disappeared resp = [None] pdata = [sha, resp] with open(cachefile, 'wb') as f: pickle_dump(pdata, f) else: resp = pdata[1] result = False if resp[0]: result = True return result
def load_update_fetch(self, property_name): '''Fetch a get() property for an object''' edata = None events = [] updated = None update = False write_cache = False self.repo.update() pfile = os.path.join(self.cachedir, u'%s.pickle' % property_name) pdir = os.path.dirname(pfile) if not os.path.isdir(pdir): os.makedirs(pdir) if os.path.isfile(pfile): try: with open(pfile, 'rb') as f: edata = pickle_load(f) except Exception as e: update = True write_cache = True # check the timestamp on the cache if edata: updated = edata[0] events = edata[1] if updated < self.repo.updated_at: update = True write_cache = True # pull all events if timestamp is behind or no events cached if update or not events: write_cache = True updated = self.get_current_time() try: methodToCall = getattr(self.repo, u'get_' + property_name) except Exception as e: logging.error(e) if C.DEFAULT_BREAKPOINTS: logging.error(u'breakpoint!') import epdb epdb.st() else: raise Exception(u'unable to get %s' % property_name) events = [x for x in methodToCall()] if C.DEFAULT_PICKLE_ISSUES: if write_cache or not os.path.isfile(pfile): # need to dump the pickle back to disk edata = [updated, events] with open(pfile, 'wb') as f: pickle_dump(edata, f) return events
def pullrequest_filepath_exists(self, filepath): ''' Check if a file exists on the submitters branch ''' # https://github.com/ansible/ansibullbot/issues/406 # https://developer.github.com/v3/repos/contents/ # GET /repos/:owner/:repo/readme # "contents_url": # "https://api.github.com/repos/ganeshrn/ansible/contents/{+path}", # self.pullrequest.head # - ref --> branch name # - repo.full_name sha = self.pullrequest.head.sha pdata = None resp = None cachefile = os.path.join( self.cachedir, u'issues', to_text(self.number), u'shippable_yml.pickle' ) try: if os.path.isfile(cachefile): with open(cachefile, 'rb') as f: pdata = pickle_load(f) except Exception as e: logging.error(u'failed to unpickle %s %s' % (cachefile, to_text(e))) if not pdata or pdata[0] != sha: if self.pullrequest.head.repo: url = self.pullrequest.head.repo.url + u'/contents/' + filepath resp = self.pullrequest._requester.requestJson( u"GET", url, input={u'ref': self.pullrequest.head.ref} ) else: # https://github.com/ansible/ansible/pull/19891 # Sometimes the repo repo/branch has disappeared resp = [None] pdata = [sha, resp] with open(cachefile, 'wb') as f: pickle_dump(pdata, f) else: resp = pdata[1] result = False if resp[0]: result = True return result
def save_issue(self): pfile = os.path.join(self.cachedir, u'issues', to_text(self.instance.number), u'issue.pickle') pdir = os.path.dirname(pfile) if not os.path.isdir(pdir): os.makedirs(pdir) logging.debug(u'dump %s' % pfile) with open(pfile, 'wb') as f: pickle_dump(self.instance, f)
def load_update_fetch(self, property_name): '''Fetch a get() property for an object''' edata = None events = [] updated = None update = False write_cache = False self.repo.update() pfile = os.path.join(self.cachedir, u'%s.pickle' % property_name) pdir = os.path.dirname(pfile) if not os.path.isdir(pdir): os.makedirs(pdir) if os.path.isfile(pfile): try: with open(pfile, 'rb') as f: edata = pickle_load(f) except Exception as e: update = True write_cache = True # check the timestamp on the cache if edata: updated = edata[0] events = edata[1] if updated < self.repo.updated_at: update = True write_cache = True # pull all events if timestamp is behind or no events cached if update or not events: write_cache = True updated = self.get_current_time() try: methodToCall = getattr(self.repo, u'get_' + property_name) except Exception as e: logging.error(e) if C.DEFAULT_BREAKPOINTS: logging.error(u'breakpoint!') import epdb; epdb.st() else: raise Exception(u'unable to get %s' % property_name) events = [x for x in methodToCall()] if write_cache or not os.path.isfile(pfile): # need to dump the pickle back to disk edata = [updated, events] with open(pfile, 'wb') as f: pickle_dump(edata, f) return events
def save_pullrequest(self, issue): cfile = os.path.join( self.cachedir, u'issues', to_text(issue.number), u'pullrequest.pickle' ) cdir = os.path.dirname(cfile) if not os.path.isdir(cdir): os.makedirs(cdir) with open(cfile, 'wb') as f: pickle_dump(issue, f)
def get_pullrequest_status(self, force_fetch=False): fetched = False jdata = None pdata = None # pull out the status url from the raw data rd = self.pullrequest_raw_data surl = rd[u'statuses_url'] pfile = os.path.join(self.full_cachedir, u'pr_status.pickle') pdir = os.path.dirname(pfile) if not os.path.isdir(pdir): os.makedirs(pdir) if os.path.isfile(pfile): logging.info(u'pullrequest_status load pfile') with open(pfile, 'rb') as f: pdata = pickle_load(f) if pdata: # is the data stale? if pdata[0] < self.pullrequest.updated_at or force_fetch: logging.info(u'fetching pr status: stale, previous from %s' % pdata[0]) jdata = self.github.get_request(surl) if isinstance(jdata, dict): # https://github.com/ansible/ansibullbot/issues/959 logging.error( u'Got the following error while fetching PR status: %s', jdata.get(u'message')) logging.error(jdata) return [] self.log_ci_status(jdata) fetched = True else: jdata = pdata[1] # missing? if not jdata: logging.info(u'fetching pr status: !data') jdata = self.github.get_request(surl) # FIXME? should we self.log_ci_status(jdata) here too? fetched = True if fetched or not os.path.isfile(pfile): logging.info(u'writing %s' % pfile) pdata = (self.pullrequest.updated_at, jdata) with open(pfile, 'wb') as f: pickle_dump(pdata, f) return jdata
def save_issue(self, issue): cfile = os.path.join( self.cachedir, u'issues', to_text(issue.number), u'issue.pickle' ) cdir = os.path.dirname(cfile) if not os.path.isdir(cdir): os.makedirs(cdir) logging.debug(u'dump %s' % cfile) with open(cfile, 'wb') as f: pickle_dump(issue, f)
def save_issue(self): pfile = os.path.join( self.cachedir, u'issues', to_text(self.instance.number), u'issue.pickle' ) pdir = os.path.dirname(pfile) if not os.path.isdir(pdir): os.makedirs(pdir) logging.debug(u'dump %s' % pfile) with open(pfile, 'wb') as f: pickle_dump(self.instance, f)
def _dump_cache(self): if not os.path.isdir(self.cachedir): os.makedirs(self.cachedir) # keep the timestamp cachedata = {u'updated_at': self.issue.instance.updated_at, u'history': self.history} try: with open(self.cachefile, 'wb') as f: pickle_dump(cachedata, f) except Exception as e: logging.error(e) if C.DEFAULT_BREAKPOINTS: logging.error(u'breakpoint!') import epdb; epdb.st() else: raise Exception(u'')
def _dump_cache(self): if not os.path.isdir(self.cachedir): os.makedirs(self.cachedir) # keep the timestamp cachedata = {u'updated_at': self.issue.instance.updated_at, u'history': self.history} try: with open(self.cachefile, 'wb') as f: pickle_dump(cachedata, f) except Exception as e: logging.error(e) if C.DEFAULT_BREAKPOINTS: logging.error(u'breakpoint!') import epdb; epdb.st() else: raise Exception(u'')
def get_members(self, organization): """Get members of an organization Args: organization: name of the organization Returns: A list of GitHub login belonging to the organization """ members = [] update = False write_cache = False now = self.get_current_time() gh_org = self._connect().get_organization(organization) cachedir = os.path.join(self.cachedir_base, organization) if not os.path.isdir(cachedir): os.makedirs(cachedir) cachefile = os.path.join(cachedir, 'members.pickle') if os.path.isfile(cachefile): with open(cachefile, 'rb') as f: mdata = pickle_load(f) members = mdata[1] if mdata[0] < gh_org.updated_at: update = True else: update = True write_cache = True if update: members = gh_org.get_members() members = [x.login for x in members] # save the data if write_cache: mdata = [now, members] with open(cachefile, 'wb') as f: pickle_dump(mdata, f) return members
def get_members(self, organization): """Get members of an organization Args: organization: name of the organization Returns: A list of GitHub login belonging to the organization """ members = [] update = False write_cache = False now = self.get_current_time() gh_org = self._connect().get_organization(organization) cachedir = os.path.join(self.cachedir_base, organization) if not os.path.isdir(cachedir): os.makedirs(cachedir) cachefile = os.path.join(cachedir, 'members.pickle') if os.path.isfile(cachefile): with open(cachefile, 'rb') as f: mdata = pickle_load(f) members = mdata[1] if mdata[0] < gh_org.updated_at: update = True else: update = True write_cache = True if update: members = gh_org.get_members() members = [x.login for x in members] # save the data if write_cache: mdata = [now, members] with open(cachefile, 'wb') as f: pickle_dump(mdata, f) return members
def jobs(self): if self._jobs is None: if self.build_id: if not os.path.isdir(self._cachedir): os.makedirs(self._cachedir) cache_file = os.path.join( self._cachedir, u'timeline_%s.pickle' % self.build_id) resp = fetch(TIMELINE_URL_FMT % self.build_id) if resp is None: data = None if os.path.isfile(cache_file): logging.info( u'timeline was probably removed, load it from cache' ) with open(cache_file, 'rb') as f: data = pickle_load(f) else: data = resp.json() data = (strip_time_safely(data['lastChangedOn']), data) logging.info(u'writing %s' % cache_file) with open(cache_file, 'wb') as f: pickle_dump(data, f) if data is not None: data = data[1] self._jobs = [ r for r in data['records'] if r['type'] == 'Job' ] self._updated_at = strip_time_safely( data['lastChangedOn']) # FIXME self._stages = [ r for r in data['records'] if r['type'] == 'Stage' ] # FIXME else: self._jobs = [] self._updated_at = strip_time_safely('1970-01-01') self._stages = [] else: self._jobs = [] return self._jobs
def get_artifact(self, name, url): if not os.path.isdir(self._cachedir): os.makedirs(self._cachedir) data = None cache_file = os.path.join( self._cachedir, u'%s_%s.pickle' % (name.replace(' ', '-'), self.build_id)) if os.path.isfile(cache_file): logging.info(u'loading %s' % cache_file) with open(cache_file, 'rb') as f: data = pickle_load(f) if data is None or (data and data[0] < self.updated_at) or not data[1]: if data: logging.info(u'fetching artifacts: stale, previous from %s' % data[0]) else: logging.info(u'fetching artifacts: stale, no previous data') resp = fetch(url, stream=True) if resp is not None: with BytesIO() as data: for chunk in resp.iter_content(chunk_size=128): data.write(chunk) artifact_zip = ZipFile(data) artifact_data = [] for fn in artifact_zip.namelist(): if 'ansible-test-' not in fn: continue with artifact_zip.open(fn) as f: artifact_data.append(json.load(f)) data = (self.updated_at, artifact_data) logging.info(u'writing %s' % cache_file) with open(cache_file, 'wb') as f: pickle_dump(data, f) if data: return data[1]
def artifacts(self): if self._artifacts is None: # FIXME deduplicate code if not os.path.isdir(self._cachedir): os.makedirs(self._cachedir) data = None cache_file = os.path.join(self._cachedir, u'artifacts_%s.pickle' % self.build_id) if os.path.isfile(cache_file): logging.info(u'load artifacts cache') with open(cache_file, 'rb') as f: data = pickle_load(f) if data is None or (data and data[0] < self.updated_at) or not data[1]: if data: logging.info( u'fetching artifacts: stale, previous from %s' % data[0]) else: logging.info( u'fetching artifacts: stale, no previous data') resp = fetch(ARTIFACTS_URL_FMT % self.build_id) if resp is not None: data = [ a for a in resp.json()['value'] if a['name'].startswith('Bot') ] data = (self.updated_at, data) logging.info(u'writing %s' % cache_file) with open(cache_file, 'wb') as f: pickle_dump(data, f) if data: self._artifacts = data[1] return self._artifacts
def _get_module_blames(self): ''' Scrape the blame page for each module and store it ''' keys = sorted(self.modules.keys()) # scrape the data for k in keys: cpath = os.path.join(self.gitrepo.checkoutdir, k) if not os.path.isfile(cpath): self.committers[k] = {} continue ghash = self.last_commit_for_file(k) pfile = os.path.join(self.scraper_cache, k.replace(u'/', u'_') + u'.blame.pickle') sargs = [u'ansible', u'ansible', u'devel', k] refresh = False if not os.path.isfile(pfile): refresh = True else: logging.debug(u'load {}'.format(pfile)) with open(pfile, 'rb') as f: pdata = pickle_load(f) if C.DEFAULT_BREAKPOINTS: logging.error(u'breakpoint!') import epdb epdb.st() if pdata[0] == ghash: self.committers[k] = pdata[1] if len(pdata) == 3: # use emailmap if available emailmap = pdata[2] else: emailmap = {} else: refresh = True if refresh: if self.gqlc: logging.debug(u'graphql blame usernames {}'.format(pfile)) uns, emailmap = self.gqlc.get_usernames_from_filename_blame( *sargs) else: emailmap = {} # scrapping: emails not available logging.debug(u'www blame usernames {}'.format(pfile)) uns = self.gws.get_usernames_from_filename_blame(*sargs) self.committers[k] = uns with open(pfile, 'wb') as f: pickle_dump((ghash, uns, emailmap), f) for email, github_id in emailmap.items(): if email not in self.emails_cache: self.emails_cache[email] = github_id # add scraped logins to the map for k in keys: for idx, x in enumerate(self.commits[k]): if x[u'email'] in [u'@']: continue if x[u'email'] not in self.emails_cache: self.emails_cache[x[u'email']] = None if x[u'login']: self.emails_cache[x[u'email']] = x[u'login'] continue xhash = x[u'hash'] for ck, cv in six.iteritems(self.committers[k]): if xhash in cv: self.emails_cache[x[u'email']] = ck break # fill in what we can ... for k in keys: for idx, x in enumerate(self.commits[k]): if not x[u'login']: if x[u'email'] in [u'@']: continue if self.emails_cache[x[u'email']]: login = self.emails_cache[x[u'email']] xhash = x[u'hash'] self.commits[k][idx][u'login'] = login if login not in self.committers[k]: self.committers[k][login] = [] if xhash not in self.committers[k][login]: self.committers[k][login].append(xhash)
def load_update_fetch(self, property_name, obj=None, force=False): '''Fetch a property for an issue object''' # A pygithub issue object has methods such as ... # - get_events() # - get_comments() # Those methods return a list with no update() property, # so we can't take advantage of the caching scheme used # for the issue it's self. Instead this function calls # those methods by their given name, and write the data # to a pickle file with a timestamp for the fetch time. # Upon later loading of the pickle, the timestamp is # compared to the issue's update_at timestamp and if the # pickle data is behind, the process will be repeated. edata = None events = [] updated = None update = False write_cache = False pfile = os.path.join(self.full_cachedir, u'%s.pickle' % property_name) pdir = os.path.dirname(pfile) logging.debug(pfile) if not os.path.isdir(pdir): os.makedirs(pdir) if os.path.isfile(pfile): try: with open(pfile, 'rb') as f: edata = pickle_load(f) except Exception as e: update = True write_cache = True # check the timestamp on the cache if edata: updated = edata[0] events = edata[1] if updated < self.instance.updated_at: update = True write_cache = True baseobj = None if obj: if obj == u'issue': baseobj = self.instance elif obj == u'pullrequest': baseobj = self.pullrequest else: if hasattr(self.instance, u'get_' + property_name): baseobj = self.instance else: if self.pullrequest: if hasattr(self.pullrequest, u'get_' + property_name): baseobj = self.pullrequest if not baseobj: logging.error( u'%s was not a property for the issue or the pullrequest' % property_name) if C.DEFAULT_BREAKPOINTS: logging.error(u'breakpoint!') import epdb epdb.st() else: raise Exception(u'property error') # pull all events if timestamp is behind or no events cached if update or not events or force: write_cache = True updated = datetime.datetime.utcnow() if not hasattr(baseobj, u'get_' + property_name) \ and hasattr(baseobj, property_name): # !callable properties try: methodToCall = getattr(baseobj, property_name) except Exception as e: logging.error(e) if C.DEFAULT_BREAKPOINTS: logging.error(u'breakpoint!') import epdb epdb.st() else: raise Exception(to_text(e)) events = methodToCall else: # callable properties try: methodToCall = getattr(baseobj, u'get_' + property_name) except Exception as e: logging.error(e) if C.DEFAULT_BREAKPOINTS: logging.error(u'breakpoint!') import epdb epdb.st() else: raise Exception(to_text(e)) events = [x for x in methodToCall()] if C.DEFAULT_PICKLE_ISSUES: if write_cache or not os.path.isfile(pfile) or force: # need to dump the pickle back to disk edata = [updated, events] with open(pfile, 'wb') as f: pickle_dump(edata, f) return events
def save_repo(self): with open(self.cachefile, 'wb') as f: pickle_dump(self.repo, f)
def save_repo(self): with open(self.cachefile, 'wb') as f: pickle_dump(self.repo, f)
def get_pullrequest_status(self, force_fetch=False): def sort_unique_statuses(statuses): '''reduce redundant statuses to the final run for each id''' result = [] groups = [] thisgroup = [] for idx, x in enumerate(statuses): if not thisgroup: thisgroup.append(x) if idx == len(statuses) - 1: groups.append(thisgroup) continue else: if thisgroup[-1][u'target_url'] == x[u'target_url']: thisgroup.append(x) else: groups.append(thisgroup) thisgroup = [] thisgroup.append(x) if idx == len(statuses) - 1: groups.append(thisgroup) for group in groups: group.sort(key=operator.itemgetter(u'updated_at')) result.append(group[-1]) return result fetched = False jdata = None pdata = None # pull out the status url from the raw data rd = self.pullrequest_raw_data surl = rd[u'statuses_url'] pfile = os.path.join(self.cachedir, u'issues', to_text(self.number), u'pr_status.pickle') pdir = os.path.dirname(pfile) if not os.path.isdir(pdir): os.makedirs(pdir) if os.path.isfile(pfile): logging.info(u'pullrequest_status load pfile') with open(pfile, 'rb') as f: pdata = pickle_load(f) if pdata: # is the data stale? if pdata[0] < self.pullrequest.updated_at or force_fetch: logging.info(u'fetching pr status: stale, previous from %s' % pdata[0]) jdata = self._fetch_api_url(surl) self.log_ci_status(jdata) fetched = True else: jdata = pdata[1] # missing? if not jdata: logging.info(u'fetching pr status: !data') jdata = self._fetch_api_url(surl) fetched = True if fetched or not os.path.isfile(pfile): logging.info(u'writing %s' % pfile) pdata = (self.pullrequest.updated_at, jdata) with open(pfile, 'wb') as f: pickle_dump(pdata, f) # remove intermediate duplicates #jdata = sort_unique_statuses(jdata) return jdata
def get_module_commits(self): keys = self.modules.keys() keys = sorted(keys) for k in keys: self.commits[k] = [] cpath = os.path.join(self.gitrepo.checkoutdir, k) if not os.path.isfile(cpath): continue mtime = os.path.getmtime(cpath) refresh = False pfile = os.path.join( self.scraper_cache, k.replace(u'/', u'_') + u'.commits.pickle' ) if not os.path.isfile(pfile): refresh = True else: pickle_kwargs = {'encoding': 'bytes'} if six.PY3 else {} print(pfile) with open(pfile, 'rb') as f: pdata = pickle_load(f, **pickle_kwargs) if pdata[0] == mtime: self.commits[k] = pdata[1] else: refresh = True if refresh: logging.info(u'refresh commit cache for %s' % k) cmd = u'cd %s; git log --follow %s' % (self.gitrepo.checkoutdir, k) (rc, so, se) = run_command(cmd) for line in to_text(so).split(u'\n'): if line.startswith(u'commit '): commit = { u'name': None, u'email': None, u'login': None, u'hash': line.split()[-1], u'date': None } # Author: Matt Clay <*****@*****.**> if line.startswith(u'Author: '): line = line.replace(u'Author: ', u'') line = line.replace(u'<', u'') line = line.replace(u'>', u'') lparts = line.split() if u'@' in lparts[-1]: commit[u'email'] = lparts[-1] commit[u'name'] = u' '.join(lparts[:-1]) else: pass if commit[u'email'] and \ u'noreply.github.com' in commit[u'email']: commit[u'login'] = commit[u'email'].split(u'@')[0] # Date: Sat Jan 28 23:28:53 2017 -0800 if line.startswith(u'Date:'): dstr = line.split(u':', 1)[1].strip() dstr = u' '.join(dstr.split(u' ')[:-1]) ds = datetime.datetime.strptime( to_text(dstr), u'%a %b %d %H:%M:%S %Y' ) commit[u'date'] = ds self.commits[k].append(commit) with open(pfile, 'wb') as f: pickle_dump((mtime, self.commits[k]), f)
def _get_module_blames(self): ''' Scrape the blame page for each module and store it ''' keys = sorted(self.modules.keys()) # scrape the data for k in keys: cpath = os.path.join(self.gitrepo.checkoutdir, k) if not os.path.isfile(cpath): self.committers[k] = {} continue ghash = self.last_commit_for_file(k) pfile = os.path.join( self.scraper_cache, k.replace(u'/', u'_') + u'.blame.pickle' ) sargs = [u'ansible', u'ansible', u'devel', k] refresh = False if not os.path.isfile(pfile): refresh = True else: logging.debug(u'load {}'.format(pfile)) with open(pfile, 'rb') as f: pdata = pickle_load(f) if C.DEFAULT_BREAKPOINTS: logging.error(u'breakpoint!') import epdb; epdb.st() if pdata[0] == ghash: self.committers[k] = pdata[1] if len(pdata) == 3: # use emailmap if available emailmap = pdata[2] else: emailmap = {} else: refresh = True if refresh: if self.gqlc: logging.debug(u'graphql blame usernames {}'.format(pfile)) uns, emailmap = self.gqlc.get_usernames_from_filename_blame(*sargs) else: emailmap = {} # scrapping: emails not available logging.debug(u'www blame usernames {}'.format(pfile)) uns = self.gws.get_usernames_from_filename_blame(*sargs) self.committers[k] = uns with open(pfile, 'wb') as f: pickle_dump((ghash, uns, emailmap), f) for email, github_id in emailmap.items(): if email not in self.emails_cache: self.emails_cache[email] = github_id # add scraped logins to the map for k in keys: for idx, x in enumerate(self.commits[k]): if x[u'email'] in [u'@']: continue if x[u'email'] not in self.emails_cache: self.emails_cache[x[u'email']] = None if x[u'login']: self.emails_cache[x[u'email']] = x[u'login'] continue xhash = x[u'hash'] for ck, cv in six.iteritems(self.committers[k]): if xhash in cv: self.emails_cache[x[u'email']] = ck break # fill in what we can ... for k in keys: for idx, x in enumerate(self.commits[k]): if not x[u'login']: if x[u'email'] in [u'@']: continue if self.emails_cache[x[u'email']]: login = self.emails_cache[x[u'email']] xhash = x[u'hash'] self.commits[k][idx][u'login'] = login if login not in self.committers[k]: self.committers[k][login] = [] if xhash not in self.committers[k][login]: self.committers[k][login].append(xhash)
def load_update_fetch(self, property_name, obj=None): '''Fetch a property for an issue object''' # A pygithub issue object has methods such as ... # - get_events() # - get_comments() # Those methods return a list with no update() property, # so we can't take advantage of the caching scheme used # for the issue it's self. Instead this function calls # those methods by their given name, and write the data # to a pickle file with a timestamp for the fetch time. # Upon later loading of the pickle, the timestamp is # compared to the issue's update_at timestamp and if the # pickle data is behind, the process will be repeated. edata = None events = [] updated = None update = False write_cache = False pfile = os.path.join( self.cachedir, u'issues', to_text(self.instance.number), u'%s.pickle' % property_name ) pdir = os.path.dirname(pfile) logging.debug(pfile) if not os.path.isdir(pdir): os.makedirs(pdir) if os.path.isfile(pfile): try: with open(pfile, 'rb') as f: edata = pickle_load(f) except Exception as e: update = True write_cache = True # check the timestamp on the cache if edata: updated = edata[0] events = edata[1] if updated < self.instance.updated_at: update = True write_cache = True baseobj = None if obj: if obj == u'issue': baseobj = self.instance elif obj == u'pullrequest': baseobj = self.pullrequest else: if hasattr(self.instance, u'get_' + property_name): baseobj = self.instance else: if self.pullrequest: if hasattr(self.pullrequest, u'get_' + property_name): baseobj = self.pullrequest if not baseobj: logging.error( u'%s was not a property for the issue or the pullrequest' % property_name ) if C.DEFAULT_BREAKPOINTS: logging.error(u'breakpoint!') import epdb; epdb.st() else: raise Exception(u'property error') # pull all events if timestamp is behind or no events cached if update or not events: write_cache = True updated = self.get_current_time() if not hasattr(baseobj, u'get_' + property_name) \ and hasattr(baseobj, property_name): # !callable properties try: methodToCall = getattr(baseobj, property_name) except Exception as e: logging.error(e) if C.DEFAULT_BREAKPOINTS: logging.error(u'breakpoint!') import epdb; epdb.st() else: raise Exception(to_text(e)) events = methodToCall else: # callable properties try: methodToCall = getattr(baseobj, u'get_' + property_name) except Exception as e: logging.error(e) if C.DEFAULT_BREAKPOINTS: logging.error(u'breakpoint!') import epdb; epdb.st() else: raise Exception(to_text(e)) events = [x for x in methodToCall()] if write_cache or not os.path.isfile(pfile): # need to dump the pickle back to disk edata = [updated, events] with open(pfile, 'wb') as f: pickle_dump(edata, f) return events
def get_pullrequest_status(self, force_fetch=False): def sort_unique_statuses(statuses): '''reduce redundant statuses to the final run for each id''' result = [] groups = [] thisgroup = [] for idx, x in enumerate(statuses): if not thisgroup: thisgroup.append(x) if idx == len(statuses) - 1: groups.append(thisgroup) continue else: if thisgroup[-1][u'target_url'] == x[u'target_url']: thisgroup.append(x) else: groups.append(thisgroup) thisgroup = [] thisgroup.append(x) if idx == len(statuses) - 1: groups.append(thisgroup) for group in groups: group.sort(key=operator.itemgetter(u'updated_at')) result.append(group[-1]) return result fetched = False jdata = None pdata = None # pull out the status url from the raw data rd = self.pullrequest_raw_data surl = rd[u'statuses_url'] pfile = os.path.join(self.full_cachedir, u'pr_status.pickle') pdir = os.path.dirname(pfile) if not os.path.isdir(pdir): os.makedirs(pdir) if os.path.isfile(pfile): logging.info(u'pullrequest_status load pfile') with open(pfile, 'rb') as f: pdata = pickle_load(f) if pdata: # is the data stale? if pdata[0] < self.pullrequest.updated_at or force_fetch: logging.info(u'fetching pr status: stale, previous from %s' % pdata[0]) jdata = self.github.get_request(surl) if isinstance(jdata, dict): # https://github.com/ansible/ansibullbot/issues/959 logging.error(u'Got the following error while fetching PR status: %s', jdata.get(u'message')) logging.error(jdata) return [] self.log_ci_status(jdata) fetched = True else: jdata = pdata[1] # missing? if not jdata: logging.info(u'fetching pr status: !data') jdata = self.github.get_request(surl) # FIXME? should we self.log_ci_status(jdata) here too? fetched = True if fetched or not os.path.isfile(pfile): logging.info(u'writing %s' % pfile) pdata = (self.pullrequest.updated_at, jdata) with open(pfile, 'wb') as f: pickle_dump(pdata, f) # remove intermediate duplicates #jdata = sort_unique_statuses(jdata) return jdata
def get_module_commits(self): keys = self.modules.keys() keys = sorted(keys) for k in keys: self.commits[k] = [] cpath = os.path.join(self.gitrepo.checkoutdir, k) if not os.path.isfile(cpath): continue mtime = os.path.getmtime(cpath) refresh = False pfile = os.path.join(self.scraper_cache, k.replace(u'/', u'_') + u'.commits.pickle') if not os.path.isfile(pfile): refresh = True else: pickle_kwargs = {'encoding': 'bytes'} if six.PY3 else {} print(pfile) with open(pfile, 'rb') as f: pdata = pickle_load(f, **pickle_kwargs) if pdata[0] == mtime: self.commits[k] = pdata[1] else: refresh = True if refresh: logging.info(u'refresh commit cache for %s' % k) cmd = u'cd %s; git log --follow %s' % ( self.gitrepo.checkoutdir, k) (rc, so, se) = run_command(cmd) for line in to_text(so).split(u'\n'): if line.startswith(u'commit '): commit = { u'name': None, u'email': None, u'login': None, u'hash': line.split()[-1], u'date': None } # Author: Matt Clay <*****@*****.**> if line.startswith(u'Author: '): line = line.replace(u'Author: ', u'') line = line.replace(u'<', u'') line = line.replace(u'>', u'') lparts = line.split() if u'@' in lparts[-1]: commit[u'email'] = lparts[-1] commit[u'name'] = u' '.join(lparts[:-1]) else: pass if commit[u'email'] and \ u'noreply.github.com' in commit[u'email']: commit[u'login'] = commit[u'email'].split(u'@')[0] # Date: Sat Jan 28 23:28:53 2017 -0800 if line.startswith(u'Date:'): dstr = line.split(u':', 1)[1].strip() dstr = u' '.join(dstr.split(u' ')[:-1]) ds = datetime.datetime.strptime( to_text(dstr), u'%a %b %d %H:%M:%S %Y') commit[u'date'] = ds self.commits[k].append(commit) with open(pfile, 'wb') as f: pickle_dump((mtime, self.commits[k]), f)
def get_pullrequest_status(self, force_fetch=False): def sort_unique_statuses(statuses): '''reduce redundant statuses to the final run for each id''' result = [] groups = [] thisgroup = [] for idx, x in enumerate(statuses): if not thisgroup: thisgroup.append(x) if idx == len(statuses) - 1: groups.append(thisgroup) continue else: if thisgroup[-1][u'target_url'] == x[u'target_url']: thisgroup.append(x) else: groups.append(thisgroup) thisgroup = [] thisgroup.append(x) if idx == len(statuses) - 1: groups.append(thisgroup) for group in groups: group.sort(key=operator.itemgetter(u'updated_at')) result.append(group[-1]) return result fetched = False jdata = None pdata = None # pull out the status url from the raw data rd = self.pullrequest_raw_data surl = rd[u'statuses_url'] pfile = os.path.join( self.cachedir, u'issues', to_text(self.number), u'pr_status.pickle' ) pdir = os.path.dirname(pfile) if not os.path.isdir(pdir): os.makedirs(pdir) if os.path.isfile(pfile): logging.info(u'pullrequest_status load pfile') with open(pfile, 'rb') as f: pdata = pickle_load(f) if pdata: # is the data stale? if pdata[0] < self.pullrequest.updated_at or force_fetch: logging.info(u'fetching pr status: stale, previous from %s' % pdata[0]) jdata = self._fetch_api_url(surl) self.log_ci_status(jdata) fetched = True else: jdata = pdata[1] # missing? if not jdata: logging.info(u'fetching pr status: !data') jdata = self._fetch_api_url(surl) fetched = True if fetched or not os.path.isfile(pfile): logging.info(u'writing %s' % pfile) pdata = (self.pullrequest.updated_at, jdata) with open(pfile, 'wb') as f: pickle_dump(pdata, f) # remove intermediate duplicates #jdata = sort_unique_statuses(jdata) return jdata