def get_last_updated_date(self): change = self.db.get_last_updated(self.repository_el_re) if not change: return self.updated_since or utcnow().strftime( "%Y-%m-%dT%H:%M:%SZ") else: log.info("Most recent change date in the database for %s is %s" % (self.repository_el_re, change["updated_at"])) return change["updated_at"]
def wait_for_call(self): if self.quota_remain <= 150: until_reset = self.resetat - utils.utcnow() self.log.info("Quota remain: %s/calls delay until " "reset: %s/secs waiting ..." % (self.quota_remain, until_reset.seconds)) sleep(until_reset.seconds + 60) self.get_rate_limit() else: self.log.debug("Sleeping 1 sec to be a good citizen") sleep(1)
def ensure_gte_lte(es, index, repository_fullname, params): if not params.get("gte"): first_created_event = _first_created_event(es, index, repository_fullname, params) if first_created_event: params["gte"] = int( is8601_to_dt(first_created_event).timestamp() * 1000) else: # There is probably nothing in the db that match the query params["gte"] = None if not params.get("lte"): params["lte"] = int(utcnow().timestamp() * 1000)
def _first_event_on_changes(es, index, repository_fullname, params): params = deepcopy(params) def keyfunc(x): return x["change_id"] groups = {} _events = _scan(es, index, repository_fullname, params) _events = sorted(_events, key=lambda k: k["change_id"]) # Keep by Change the created date + first event date for pr, events in groupby(_events, keyfunc): groups[pr] = { "change_created_at": None, "first_event_created_at": utcnow(), "first_event_author": None, "delta": None, } for event in events: if not groups[pr]["change_created_at"]: groups[pr]["change_created_at"] = is8601_to_dt( event["on_created_at"]) event_created_at = is8601_to_dt(event["created_at"]) if event_created_at < groups[pr]["first_event_created_at"]: groups[pr]["first_event_created_at"] = event_created_at groups[pr]["delta"] = (groups[pr]["first_event_created_at"] - groups[pr]["change_created_at"]) groups[pr]["first_event_author"] = event["author"]["muid"] ret = {"first_event_delay_avg": 0, "top_authors": {}} for pr_data in groups.values(): ret["first_event_delay_avg"] += pr_data["delta"].seconds ret["top_authors"].setdefault(pr_data["first_event_author"], 0) ret["top_authors"][pr_data["first_event_author"]] += 1 try: ret["first_event_delay_avg"] = int(ret["first_event_delay_avg"] / len(groups)) except ZeroDivisionError: ret["first_event_delay_avg"] = 0 ret["top_authors"] = sorted( [(k, v) for k, v in ret["top_authors"].items()], key=lambda x: x[1], reverse=True, )[:10] return ret
def _first_event_on_changes(es, index, repository_fullname, params): params = deepcopy(params) def keyfunc(x): return x['change_id'] groups = {} _events = _scan(es, index, repository_fullname, params) _events = sorted(_events, key=lambda k: k['change_id']) # Keep by Change the created date + first event date for pr, events in groupby(_events, keyfunc): groups[pr] = { 'change_created_at': None, 'first_event_created_at': utcnow(), 'first_event_author': None, 'delta': None, } for event in events: if not groups[pr]['change_created_at']: groups[pr]['change_created_at'] = is8601_to_dt(event['on_created_at']) event_created_at = is8601_to_dt(event['created_at']) if event_created_at < groups[pr]['first_event_created_at']: groups[pr]['first_event_created_at'] = event_created_at groups[pr]['delta'] = ( groups[pr]['first_event_created_at'] - groups[pr]['change_created_at'] ) groups[pr]['first_event_author'] = event['author'] ret = {'first_event_delay_avg': 0, 'top_authors': {}} for pr_data in groups.values(): ret['first_event_delay_avg'] += pr_data['delta'].seconds ret['top_authors'].setdefault(pr_data['first_event_author'], 0) ret['top_authors'][pr_data['first_event_author']] += 1 try: ret['first_event_delay_avg'] = int(ret['first_event_delay_avg'] / len(groups)) except ZeroDivisionError: ret['first_event_delay_avg'] = 0 ret['top_authors'] = sorted( [(k, v) for k, v in ret['top_authors'].items()], key=lambda x: x[1], reverse=True, )[:10] return ret