def _daemon(self, please_stop): while not please_stop: with Explanation("looking for work"): try: branch, revisions = self.todo.pop(till=please_stop) except Exception as e: if please_stop: break else: raise e if branch.name in DAEMON_DO_NO_SCAN: continue revisions = set(revisions) # FIND THE REVSIONS ON THIS BRANCH for r in list(revisions): with Explanation("Scanning {{branch}} {{revision|left(12)}}", branch=branch.name, revision=r, debug=DAEMON_DEBUG): rev = self.get_revision(Revision(branch=branch, changeset={"id": r})) if DAEMON_DEBUG: Log.note("found revision with push date {{date|datetime}}", date=rev.push.date) revisions.discard(r) # FIND ANY BRANCH THAT MAY HAVE THIS REVISION for r in list(revisions): self._find_revision(r)
def _normalize_revision(self, r, found_revision, push, get_diff): new_names = set(r.keys()) - {"rev", "node", "user", "description", "desc", "date", "files", "backedoutby", "parents", "children", "branch", "tags", "pushuser", "pushdate", "pushid", "phase", "bookmarks"} if new_names and not r.tags: Log.warning("hg is returning new property names ({{names}})", names=new_names) changeset = Changeset( id=r.node, id12=r.node[0:12], author=r.user, description=strings.limit(coalesce(r.description, r.desc), 2000), date=parse_hg_date(r.date), files=r.files, backedoutby=r.backedoutby if r.backedoutby else None, bug=self._extract_bug_id(r.description) ) rev = Revision( branch=found_revision.branch, index=r.rev, changeset=changeset, parents=unwraplist(list(set(r.parents))), children=unwraplist(list(set(r.children))), push=push, phase=r.phase, bookmarks=unwraplist(r.bookmarks), etl={"timestamp": Date.now().unix, "machine": machine_metadata} ) r.pushuser = None r.pushdate = None r.pushid = None r.node = None r.user = None r.desc = None r.description = None r.date = None r.files = None r.backedoutby = None r.parents = None r.children = None r.bookmarks = None set_default(rev, r) # ADD THE DIFF if get_diff or GET_DIFF: rev.changeset.diff = self._get_json_diff_from_hg(rev) try: _id = coalesce(rev.changeset.id12, "") + "-" + rev.branch.name + "-" + coalesce(rev.branch.locale, DEFAULT_LOCALE) with self.es_locker: self.es.add({"id": _id, "value": rev}) except Exception as e: Log.warning("did not save to ES", cause=e) return rev
def _daemon(self, please_stop): while not please_stop: with Explanation("looking for work"): try: branch, revisions, after = self.todo.pop(till=please_stop) except Exception as e: if please_stop: break else: raise e if branch.name in DAEMON_DO_NO_SCAN: continue revisions = set(revisions) # FIND THE REVSIONS ON THIS BRANCH for r in list(revisions): try: rev = self.get_revision( Revision(branch=branch, changeset={"id": r}), None, # local False, # get_diff True, # get_moves ) if after and after > rev.etl.timestamp: rev = self._get_from_hg(revision=rev) if DAEMON_DEBUG: Log.note( "found revision with push date {{date|datetime}}", date=rev.push.date, ) revisions.discard(r) if rev.etl.timestamp > Date.now() - ( DAEMON_RECENT_HG_PULL * SECOND): # SOME PUSHES ARE BIG, RUNNING THE RISK OTHER MACHINES ARE # ALSO INTERESTED AND PERFORMING THE SAME SCAN. THIS DELAY # WILL HAVE SMALL EFFECT ON THE MAJORITY OF SMALL PUSHES # https://bugzilla.mozilla.org/show_bug.cgi?id=1417720 Till(seconds=Random.float(DAEMON_HG_INTERVAL * 2)).wait() except Exception as e: Log.warning( "Scanning {{branch}} {{revision|left(12)}}", branch=branch.name, revision=r, cause=e, ) if "Read timed out" in e: Till(seconds=DAEMON_WAIT_AFTER_TIMEOUT).wait() # FIND ANY BRANCH THAT MAY HAVE THIS REVISION for r in list(revisions): self._find_revision(r)
def _find(please_stop): for b in queue: if please_stop: return try: url = b.url + "json-info?node=" + revision rev = self.get_revision(Revision(branch=b, changeset={"id": revision})) with locker: output.append(rev) Log.note("Revision found at {{url}}", url=url) except Exception as f: problems.append(f)
def _normalize_revision(self, r, found_revision, push, get_diff, get_moves): new_names = set(r.keys()) - KNOWN_TAGS if new_names and not r.tags: Log.warning( "hg is returning new property names {{names|quote}} for {{changeset}} from {{url}}", names=new_names, changeset=r.node, url=found_revision.branch.url, ) changeset = Changeset( id=r.node, id12=r.node[0:12], author=coalesce(r.author, r.user), description=strings.limit(coalesce(r.description, r.desc), 2000), date=parse_hg_date(r.date), files=r.files, backedoutby=r.backedoutby, backsoutnodes=r.backsoutnodes, bug=mo_math.UNION(([int(b) for b in r.bugs.no], self._extract_bug_id(r.description))), ) rev = Revision( branch=found_revision.branch, index=r.rev, changeset=changeset, parents=set(r.parents), children=set(r.children), push=push, phase=r.phase, bookmarks=unwraplist(r.bookmarks), landingsystem=r.landingsystem, etl={ "timestamp": Date.now().unix, "machine": machine_metadata }, ) rev = elasticsearch.scrub(rev) r.pushuser = None r.pushdate = None r.pushid = None r.node = None r.user = None r.desc = None r.description = None r.date = None r.files = None r.backedoutby = None r.parents = None r.children = None r.bookmarks = None r.landingsystem = None r.extra = None r.author = None r.pushhead = None r.reviewers = None r.bugs = None r.treeherderrepourl = None r.backsoutnodes = None r.treeherderrepo = None r.perfherderurl = None r.branch = None r.phase = None r.rev = None r.tags = None set_default(rev, r) # ADD THE DIFF if get_diff: rev.changeset.diff = self._get_json_diff_from_hg(rev) try: _id = (coalesce(rev.changeset.id12, "") + "-" + rev.branch.name + "-" + coalesce(rev.branch.locale, DEFAULT_LOCALE)) with self.repo_locker: self.repo.add({"id": _id, "value": rev}) if get_moves: rev.changeset.moves = self._get_moves_from_hg(rev) with self.moves_locker: self.moves.add({"id": _id, "value": rev}) except Exception as e: e = Except.wrap(e) Log.warning( "Did not save to ES, waiting {{duration}} seconds", duration=WAIT_AFTER_NODE_FAILURE, cause=e, ) Till(seconds=WAIT_AFTER_NODE_FAILURE).wait() if "FORBIDDEN/12/index read-only" in e: pass # KNOWN FAILURE MODE return rev