def _changes_requested_by(user_reviews, shipits, last_commit, ready_for_review): outstanding = set() for actor, review in user_reviews.items(): if review[u'state'] == u'CHANGES_REQUESTED': if actor in shipits: review_time = strip_time_safely(review[u'submitted_at']) review_time = pytz.utc.localize(review_time) shipit_time = shipits[actor] if review_time < shipit_time: # ignore review older than shipit # https://github.com/ansible/ansibullbot/issues/671 continue if ready_for_review: review_time = strip_time_safely(review[u'submitted_at']) review_time = pytz.utc.localize(review_time) if review[ u'commit_id'] != last_commit and review_time < ready_for_review: # ignore review older than ready_for_review comment wrote by submitter # but only if the pull request has been updated (meaning the # last commit isn't the reviewed commit). continue outstanding.add(actor) elif review[u'state'] not in [u'APPROVED', u'COMMENTED']: logging.error(u'%s unhandled' % review[u'state']) if C.DEFAULT_BREAKPOINTS: logging.error(u'breakpoint!') import epdb epdb.st() return list(outstanding)
def jobs(self): if not self.build_id: return [] if self._jobs is None: self._jobs = [] self._updated_at = strip_time_safely('1970-01-01') self._stages = [] if not os.path.isdir(self._cachedir): os.makedirs(self._cachedir) cache_file = os.path.join(self._cachedir, u'timeline_%s.pickle' % self.build_id) url = TIMELINE_URL_FMT % self.build_id resp = fetch(url, timeout=TIMEOUT) if resp is None: raise Exception('Unable to GET %s' % url) if resp.status_code == 404: data = None if os.path.isfile(cache_file): logging.info( u'timeline was probably removed, load it from cache') with open(cache_file, 'rb') as f: data = pickle.load(f) else: data = resp.json() data = (strip_time_safely(data['lastChangedOn']), data) logging.info(u'writing %s' % cache_file) with open(cache_file, 'wb') as f: pickle.dump(data, f) if data is not None: data = data[1] self._jobs = [r for r in data['records'] if r['type'] == 'Job'] self._updated_at = strip_time_safely(data['lastChangedOn']) self._stages = [ r for r in data['records'] if r['type'] == 'Stage' ] state = list({j['state'] for j in self.jobs }) # pending, completed, inProgress result = list({j['result'] for j in self.jobs}) # succeeded, failed, None if 'canceled' in result or 'cancelled' in result: self._state = 'failure' elif len(state) == 1 and 'completed' in state: if len(result) == 1 and 'succeeded' in result: self._state = 'success' elif 'failed' in result: self._state = 'failure' elif 'pending' in state or 'inProgress' in state: self._state = 'pending' else: raise ValueError( 'Unknown state for buildId: %s, state: %s' % (self.build_id, state)) return self._jobs
def _get_processed_run(self, status): run = status.copy() run_id = self._get_run_id_from_status(run) run['created_at'] = pytz.utc.localize(strip_time_safely(run.get('created_at'))) run['updated_at'] = pytz.utc.localize(strip_time_safely(run.get('updated_at'))) run['run_id'] = run_id return run
def get_processed_run(cls, run): run = run.copy() run_id = cls._get_run_id_from_status(run) run[u'created_at'] = pytz.utc.localize( strip_time_safely(run.get(u'created_at'))) run[u'updated_at'] = pytz.utc.localize( strip_time_safely(run.get(u'updated_at'))) run[u'run_id'] = run_id return run
def __init__(self, cachedir, iw): self._cachedir = os.path.join(cachedir, 'azp.runs') self._iw = iw self._build_id = None self._jobs = None self._state = None self._updated_at = None self._stages = None self._artifacts = None self.last_run = None self.created_at = None try: self.created_at = min((strip_time_safely(j['startTime']) for j in self.jobs if j['startTime'] is not None)) except ValueError: self.created_at = self.updated_at if self.state and self.build_id and self.jobs: self.last_run = { 'state': self.state, 'created_at': pytz.utc.localize(self.created_at), 'updated_at': pytz.utc.localize(self.updated_at), 'run_id': self.build_id, }
def merge_reviews(self, reviews): for review in reviews: event = {} # https://github.com/ansible/ansibullbot/issues/1207 # "ghost" users are deleted users and show up as NoneType if review.get('user') is None: continue if review[u'state'] == u'COMMENTED': event[u'event'] = u'review_comment' elif review[u'state'] == u'CHANGES_REQUESTED': event[u'event'] = u'review_changes_requested' elif review[u'state'] == u'APPROVED': event[u'event'] = u'review_approved' elif review[u'state'] == u'DISMISSED': event[u'event'] = u'review_dismissed' elif review[u'state'] == u'PENDING': # ignore pending review continue else: logging.error(u'unknown review state %s', review[u'state']) continue event[u'id'] = review[u'id'] event[u'actor'] = review[u'user'][u'login'] event[u'created_at'] = pytz.utc.localize(strip_time_safely(review[u'submitted_at'])) if u'commit_id' in review: event[u'commit_id'] = review[u'commit_id'] else: event[u'commit_id'] = None event[u'body'] = review.get(u'body') self.history.append(event) self.history = sorted(self.history, key=itemgetter(u'created_at'))
def _get_cached_url(self, url, days=0): cachedir = os.path.join(self.cachedir, 'urls') if not os.path.exists(cachedir): os.makedirs(cachedir) cachefile = os.path.join(cachedir, url.replace('/', '__')) if os.path.exists(cachefile): with open(cachefile) as f: fdata = json.loads(f.read()) jdata = fdata['result'] ts = fdata['timestamp'] now = datetime.datetime.now() ts = strip_time_safely(ts) if (now - ts).days <= days: return jdata rr = requests.get(url) jdata = rr.json() with open(cachefile, 'w') as f: f.write( json.dumps({ 'timestamp': datetime.datetime.now().isoformat(), 'result': jdata })) return jdata
def status_to_date_and_runid(status, keepstate=False): """convert pr status to a tuple of date and runid""" # https://github.com/ansible/ansibullbot/issues/934 if not status.get(u'context', u'') == u'Shippable': return None created_at = status.get(u'created_at') target = status.get(u'target_url') if target.endswith(u'/summary'): target = target.split(u'/')[-2] else: target = target.split(u'/')[-1] try: int(target) except ValueError: # strip new id out of the description runid = status[u'description'] runid = runid.split()[1] if runid.isdigit(): target = runid ts = pytz.utc.localize(strip_time_safely(created_at)) if keepstate: return ts, target, status[u'state'] else: return ts, target
def get_last_full_run_date(self): '''Map partial re-runs back to their last full run date''' # https://github.com/ansible/ansibullbot/issues/935 # extract and unique the run ids from the target urls if not self.states: raise NoCIError(u'No shippable states') runids = [self._get_run_id_from_status(x) for x in self.states] # get rid of duplicates and sort runids = sorted(set(runids)) # always use the numerically higher run id runid = runids[-1] # build a datastructure to hold the info collected rundata = { u'runid': runid, u'created_at': None, u'rerun_batch_id': None, u'rerun_batch_createdat': None } # query the api for all data on this runid try: rdata = self._get_run_data(to_text(runid), usecache=True) except ShippableNoData: return None # whoops ... if rdata is None: return None # get the referenced run for the last runid if it exists pbag = rdata.get(u'propertyBag') if pbag: rundata[u'rerun_batch_id'] = pbag.get(u'originalRunId') # keep the timestamp too rundata[u'created_at'] = rdata.get(u'createdAt') # if it had a rerunbatchid it was a partial run and # we need to go get the date on the original run while rundata[u'rerun_batch_id']: # the original run data rjdata = self._get_run_data(rundata[u'rerun_batch_id']) # swap the timestamp rundata[u'rerun_batch_createdat'] = rundata[u'created_at'] # get the old timestamp rundata[u'created_at'] = rjdata.get(u'createdAt') # get the new batchid pbag = rjdata.get(u'propertyBag') if pbag: rundata[u'rerun_batch_id'] = pbag.get(u'originalRunId') else: rundata[u'rerun_batch_id'] = None # return only the timestamp from the last full run return strip_time_safely(rundata[u'created_at'])
def _load_checkout_index(self): ci = {} if os.path.exists(self._checkout_index_file): with open(self._checkout_index_file, 'r') as f: ci = json.loads(f.read()) for k, v in ci.items(): ci[k]['updated'] = strip_time_safely(v['updated']) self._checkout_index = copy.deepcopy(ci)
def test_strip_four(self): ts = '2017-06-01T17:54:00ZDSFSDFDFSDFS' e = None try: to = strip_time_safely(ts) except Exception as e: pass assert e is not None
def jobs(self): if self._jobs is None: if self.build_id: if not os.path.isdir(self._cachedir): os.makedirs(self._cachedir) cache_file = os.path.join( self._cachedir, u'timeline_%s.pickle' % self.build_id) url = TIMELINE_URL_FMT % self.build_id resp = fetch(url) if resp is None: raise Exception("Unable to GET %s" % url) if resp.status_code == 404: data = None if os.path.isfile(cache_file): logging.info( u'timeline was probably removed, load it from cache' ) with open(cache_file, 'rb') as f: data = pickle.load(f) else: data = resp.json() data = (strip_time_safely(data['lastChangedOn']), data) logging.info(u'writing %s' % cache_file) with open(cache_file, 'wb') as f: pickle.dump(data, f) if data is not None: data = data[1] self._jobs = [ r for r in data['records'] if r['type'] == 'Job' ] self._updated_at = strip_time_safely( data['lastChangedOn']) # FIXME self._stages = [ r for r in data['records'] if r['type'] == 'Stage' ] # FIXME else: self._jobs = [] self._updated_at = strip_time_safely('1970-01-01') self._stages = [] else: self._jobs = [] return self._jobs
def is_stale(self, states): ci_date = self._get_last_shippable_full_run_date(states) # https://github.com/ansible/ansibullbot/issues/458 if ci_date: ci_date = strip_time_safely(ci_date) ci_delta = (datetime.datetime.now() - ci_date).days return ci_delta > 7 return False
def join_history(self): this_history = [x for x in self.iw.history.history] status = {} for x in self.ci_status: # target_url could be: # https://app.shippable.com/github/ansible/ansible/runs/41758/summary # https://app.shippable.com/github/ansible/ansible/runs/41758 turl = x[u'target_url'] if turl.endswith(u'/summary'): turl = turl[:-8] run_id = turl.split(u'/')[-1] if run_id == u'zuul.openstack.org': continue if run_id in status: rd = status[run_id] else: rd = self.shippable.get_run_data(run_id, usecache=True) status[run_id] = rd # sometimes the target urls are invalid # https://app.shippable.com/runs/58cc4fe537380a0800e4284c # https://app.shippable.com/github/ansible/ansible/runs/16628 if not rd: continue ts = pytz.utc.localize(strip_time_safely(x[u'updated_at'])) this_history.append({ u'actor': rd.get(u'triggeredBy', {}).get(u'login'), u'event': u'ci_run', u'created_at': ts, u'state': x[u'state'], u'run_id': run_id, u'status_id': x[u'id'], u'sha': rd[u'commitSha'] }) this_history = sorted(this_history, key=lambda k: k[u'created_at']) self.history = this_history
def update(self): success = False while not success: resp = requests.get(ANSIBLE_RUNS_URL) try: self._rawdata = resp.json() success = True except Exception as e: logging.error(e) time.sleep(2 * 60) self.runs = [x for x in self._rawdata] for idx, x in enumerate(self.runs): for k, v in six.iteritems(x): if k.endswith(u'At'): # 2017-02-07T00:27:06.482Z if v: self.runs[idx][k] = strip_time_safely(v)
def update(self): '''Fetch the latest data then send for processing''' success = False while not success: resp = requests.get(self.url) try: self._rawdata = resp.json() success = True except Exception as e: logging.error(e) time.sleep(2*60) # Fix data self.runs = [x for x in self._rawdata] for idx, x in enumerate(self.runs): for k, v in six.iteritems(x): if k.endswith(u'At'): # 2017-02-07T00:27:06.482Z if v: self.runs[idx][k] = strip_time_safely(v)
def version_by_date(self, dateobj, devel=False): if not self.DATEVERSIONS: self.DATEVERSIONS = [] cmd = u'cd %s;' % self.checkoutdir cmd += u'git log --date=short --pretty=format:"%ad;%H"' (rc, so, se) = run_command(cmd) lines = (x.strip() for x in to_text(so).split(u'\n')) lines = filter(bool, lines) for x in lines: parts = x.split(u';') self.DATEVERSIONS.append(parts) last_commit_date = self.DATEVERSIONS[0][0] last_commit_date = strip_time_safely(last_commit_date) # use last commit version if older than incoming date if dateobj >= last_commit_date: acommit = self.DATEVERSIONS[0][1] else: acommit = None datestr = to_text(dateobj).split()[0] for dv in reversed(self.DATEVERSIONS): if dv[0] == datestr: break if not acommit: datestr = u'-'.join(datestr.split(u'-')[0:2]) for dv in self.DATEVERSIONS: dvs = u'-'.join(dv[0].split(u'-')[0:2]) if dvs == datestr: acommit = dv[1] break aversion = None if acommit: aversion = self.ansible_version_by_commit(acommit) return aversion
def get_stale_numbers(self, reponame): stale = [] for number, summary in self.issue_summaries[reponame].items(): if number in stale: continue if summary['state'] == 'closed': continue number = int(number) mfile = os.path.join(self.cachedir_base, reponame, 'issues', to_text(number), 'meta.json') if not os.path.isfile(mfile): stale.append(number) continue try: with open(mfile, 'rb') as f: meta = json.load(f) except ValueError as e: logging.error('failed to parse %s: %s' % (to_text(mfile), to_text(e))) os.remove(mfile) stale.append(number) continue delta = (datetime.datetime.now() - strip_time_safely(meta['time'])).days if delta > C.DEFAULT_STALE_WINDOW: stale.append(number) stale = sorted({int(x) for x in stale}) if 10 >= len(stale) > 0: logging.info('stale: %s' % ','.join([to_text(x) for x in stale])) return stale
def _parse_events(self, events): processed_events = [] for event_no, dd in enumerate(events): if dd[u'event'] == u'committed': # FIXME # commits are added through HistoryWrapper.merge_commits() continue # reviews do not have created_at keys if not dd.get(u'created_at') and dd.get(u'submitted_at'): dd[u'created_at'] = dd[u'submitted_at'] # commits do not have created_at keys if not dd.get(u'created_at') and dd.get('author'): dd[u'created_at'] = dd[u'author'][u'date'] # commit comments do not have created_at keys if not dd.get(u'created_at') and dd.get('comments'): dd[u'created_at'] = dd[u'comments'][0][u'created_at'] if not dd.get(u'created_at'): raise AssertionError(dd) # commits do not have actors if not dd.get(u'actor'): dd[u'actor'] = {'login': None} # fix commits with no message if dd[u'event'] == u'committed' and u'message' not in dd: dd[u'message'] = u'' if not dd.get(u'id'): # set id as graphql node_id OR make one up if u'node_id' in dd: dd[u'id'] = dd[u'node_id'] else: dd[u'id'] = '%s/%s/%s/%s' % ( self.repo_full_name, self.number, 'timeline', event_no) event = {} event[u'id'] = dd[u'id'] event[u'actor'] = dd[u'actor'][u'login'] event[u'event'] = dd[u'event'] if isinstance(dd[u'created_at'], six.string_types): dd[u'created_at'] = strip_time_safely(dd[u'created_at']) event[u'created_at'] = pytz.utc.localize(dd[u'created_at']) if dd[u'event'] in [u'labeled', u'unlabeled']: event[u'label'] = dd.get(u'label', {}).get(u'name', None) elif dd[u'event'] == u'referenced': event[u'commit_id'] = dd[u'commit_id'] elif dd[u'event'] == u'assigned': event[u'assignee'] = dd[u'assignee'][u'login'] event[u'assigner'] = event[u'actor'] elif dd[u'event'] == u'commented': event[u'body'] = dd[u'body'] elif dd[u'event'] == u'cross-referenced': event[u'source'] = dd[u'source'] processed_events.append(event) return sorted(processed_events, key=lambda x: x[u'created_at'])
def test_strip_three(self): ts = u'2017-06-01T17:54:00Z' to = strip_time_safely(ts) assert to.year == 2017 assert to.month == 6 assert to.day == 1
def test_strip_one(self): ts = '2017-06-01T17:54:00Z' to = strip_time_safely(ts) assert to.year == 2017 assert to.month == 6 assert to.day == 1
def _collect_repo(self, repo, issuenums=None): '''Collect issues for an individual repo''' logging.info('getting repo obj for %s' % repo) if repo not in self.repos: gitrepo = GitRepoWrapper( cachedir=self.cachedir_base, repo=f'https://github.com/{repo}', commit=self.args.ansible_commit, ) self.repos[repo] = { 'repo': self.ghw.get_repo(repo), 'issues': [], 'processed': [], 'since': None, 'stale': [], 'loopcount': 0, 'labels': self.ghw.get_valid_labels(repo), 'gitrepo': gitrepo, } else: # force a clean repo object to limit caching problems logging.info('updating repo') self.repos[repo]['repo'] = self.ghw.get_repo(repo) logging.info('updating checkout') self.repos[repo]['gitrepo'].update() # clear the issues self.repos[repo]['issues'] = {} # increment the loopcount self.repos[repo]['loopcount'] += 1 logging.info('getting issue objs for %s' % repo) self.update_issue_summaries(repopath=repo, issuenums=issuenums) issuecache = {} numbers = self.issue_summaries[repo].keys() numbers = {int(x) for x in numbers} if issuenums: numbers.intersection_update(issuenums) numbers = list(numbers) logging.info('%s known numbers' % len(numbers)) if self.args.daemonize: if not self.repos[repo]['since']: ts = [ x[1]['updated_at'] for x in self.issue_summaries[repo].items() if x[1]['updated_at'] ] ts += [ x[1]['created_at'] for x in self.issue_summaries[repo].items() if x[1]['created_at'] ] ts = sorted(set(ts)) if ts: self.repos[repo]['since'] = ts[-1] else: since = strip_time_safely(self.repos[repo]['since']) api_since = self.repos[repo]['repo'].get_issues(since=since) numbers = [] for x in api_since: numbers.append(x.number) issuecache[x.number] = x numbers = sorted({int(n) for n in numbers}) logging.info('%s numbers after [api] since == %s' % (len(numbers), since)) for k, v in self.issue_summaries[repo].items(): if v['created_at'] is None: # issue is closed and was never processed continue if v['created_at'] > self.repos[repo]['since']: numbers.append(k) numbers = sorted({int(n) for n in numbers}) logging.info('%s numbers after [www] since == %s' % (len(numbers), since)) if self.args.start_at and self.repos[repo]['loopcount'] == 0: numbers = [x for x in numbers if x <= self.args.start_at] logging.info('%s numbers after start-at' % len(numbers)) # Get stale numbers if not targeting if self.args.daemonize and self.repos[repo]['loopcount'] > 0: logging.info('checking for stale numbers') stale = self.get_stale_numbers(repo) self.repos[repo]['stale'] = [int(x) for x in stale] numbers += [int(x) for x in stale] numbers = sorted(set(numbers)) logging.info('%s numbers after stale check' % len(numbers)) ################################################################ # PRE-FILTERING TO PREVENT EXCESSIVE API CALLS ################################################################ # filter just the open numbers if not self.args.only_closed and not self.args.ignore_state: numbers = [ x for x in numbers if (to_text(x) in self.issue_summaries[repo] and self.issue_summaries[repo][to_text(x)]['state'] == 'open') ] logging.info('%s numbers after checking state' % len(numbers)) # filter by type if self.args.only_issues: numbers = [ x for x in numbers if self.issue_summaries[repo][to_text(x)]['type'] == 'issue' ] logging.info('%s numbers after checking type' % len(numbers)) elif self.args.only_prs: numbers = [ x for x in numbers if self.issue_summaries[repo][to_text(x)] ['type'] == 'pullrequest' ] logging.info('%s numbers after checking type' % len(numbers)) numbers = sorted({int(x) for x in numbers}) if self.args.sort == 'desc': numbers = [x for x in reversed(numbers)] if self.args.last and len(numbers) > self.args.last: numbers = numbers[0 - self.args.last:] # Use iterator to avoid requesting all issues upfront self.repos[repo]['issues'] = RepoIssuesIterator( self.repos[repo]['repo'], numbers, issuecache=issuecache) logging.info('getting repo objs for %s complete' % repo)
def get_shippable_run_facts(iw, meta, shippable): '''Does an issue need the test result comment?''' # https://github.com/ansible/ansibullbot/issues/312 # https://github.com/ansible/ansibullbot/issues/404 # https://github.com/ansible/ansibullbot/issues/418 # should only be here if the run state is failed ... if not meta[u'has_shippable'] or meta[u'ci_state'] != u'failure': return { u'shippable_test_results': None, u'ci_verified': None, u'needs_testresult_notification': None } needs_testresult_notification = False last_run = shippable.get_processed_last_run(iw.pullrequest_status) last_run_id = last_run[u'run_id'] # filter by the last run id # FIXME this needs to be split into two methods shippable_test_results, ci_verified = \ shippable.get_test_results( last_run_id, usecache=True, filter_paths=[u'/testresults/ansible-test-.*.json'], ) # do validation so that we're not stepping on toes if u'ci_verified' in iw.labels and not ci_verified: ci_verified_last_applied = iw.history.label_last_applied( u'ci_verified') for ci_run in shippable.get_states(iw.pullrequest_status): ci_run_updated_at = pytz.utc.localize( strip_time_safely(ci_run[u'updated_at'])) if ci_run_updated_at <= ci_verified_last_applied: last_ci_verified_run = shippable.get_processed_run(ci_run) if last_run_id == last_ci_verified_run[u'run_id']: ci_verified = True break # no results means no notification required if len(shippable_test_results) < 1: needs_testresult_notification = False else: s_bpcs = iw.history.get_boilerplate_comments_content( bfilter='shippable_test_result') if s_bpcs: # was this specific result shown? job_ids = [x[u'job_id'] for x in shippable_test_results] job_ids = sorted(set(job_ids)) found = [] for bp in s_bpcs: for job_id in [x for x in job_ids if x not in found]: if job_id in bp and job_id not in found: found.append(job_id) if len(found) == len(job_ids): needs_testresult_notification = False else: needs_testresult_notification = True else: needs_testresult_notification = True return { u'shippable_test_results': shippable_test_results, u'ci_verified': ci_verified, u'needs_testresult_notification': needs_testresult_notification }
def test_strip_four(self): ts = u'2017-06-01T17:54:00ZDSFSDFDFSDFS' with pytest.raises(Exception): to = strip_time_safely(ts)
def test_strip_four(self): ts = '2017-06-01T17:54:00ZDSFSDFDFSDFS' with pytest.raises(Exception): to = strip_time_safely(ts)
def get_module_commits(self): keys = self.modules.keys() keys = sorted(keys) for k in keys: self.commits[k] = [] cpath = os.path.join(self.gitrepo.checkoutdir, k) if not os.path.isfile(cpath): continue mtime = os.path.getmtime(cpath) refresh = False pfile = os.path.join( self.scraper_cache, k.replace('/', '_') + '.commits.pickle' ) if not os.path.isfile(pfile): refresh = True else: print(pfile) with open(pfile, 'rb') as f: pdata = pickle.load(f) if pdata[0] == mtime: self.commits[k] = pdata[1] else: refresh = True if refresh: logging.info('refresh commit cache for %s' % k) cmd = 'cd %s; git log --follow %s' % (self.gitrepo.checkoutdir, k) (rc, so, se) = run_command(cmd) for line in to_text(so).split('\n'): if line.startswith('commit '): commit = { 'name': None, 'email': None, 'login': None, 'hash': line.split()[-1], 'date': None } # Author: Matt Clay <*****@*****.**> if line.startswith('Author: '): line = line.replace('Author: ', '') line = line.replace('<', '') line = line.replace('>', '') lparts = line.split() if '@' in lparts[-1]: commit['email'] = lparts[-1] commit['name'] = ' '.join(lparts[:-1]) else: pass if commit['email'] and \ 'noreply.github.com' in commit['email']: commit['login'] = commit['email'].split('@')[0] # Date: Sat Jan 28 23:28:53 2017 -0800 if line.startswith('Date:'): dstr = line.split(':', 1)[1].strip() dstr = ' '.join(dstr.split(' ')[:-1]) commit['date'] = strip_time_safely(to_text(dstr)) self.commits[k].append(commit) with open(pfile, 'wb') as f: pickle.dump((mtime, self.commits[k]), f)