def parse_build(self, url, body): if body.startswith('ERROR'): ret = split_jenkins_url(url) name = ret[3] build_number = ret[4] return Build(url, body, name, build_number, None, None, 'Unknown', [], self.server_url, '', []) name = extract_token(body, '<title>', ' ') h1 = extract_token(body, '<h1>', '</h1>') status = extract_token(h1, 'alt="', '"') build_number = extract_token(h1, 'Build #', '\n') start = extract_token(h1, "(", ")") duration = extract_token(body, '/buildTimeTrend">', '</a') host = extract_token(body, '<a href="/jenkins/computer/', '"') downstream_builds = extract_token(body, 'h2>Downstream Builds</h2', '</ul>') trigger = "Unknown" upstream_urls = [] if 'Started by upstream project' in body: upstream = extract_token(body, 'Started by upstream project', '</td') upstream_urls = re.findall(r'href="([^"]+[0-9]/)"', upstream) trigger = None if 'Started by GitHub push' in body: try: trigger = "commit_" + extract_token(body, 'commit: ', '<') except TypeError: trigger = "commit_unknown" if 'Started by user' in body: trigger = "started_by_user" downstream_urls = [] if downstream_builds: downstream_urls = re.findall(r'href="([^"]+[0-9]/)"', downstream_builds) build = Build(url, host, name, build_number, start, duration, status, downstream_urls, self.server_url, trigger, upstream_urls) return build
def crawl(self, url): self.url = url path = split_jenkins_url(url) self.server_url = path[0] self.base_path = path[2] self.path_root = path[1] self._crawl(self.path_root) self.clean(self.root) self.root.extra = self.stats() return [self.root]
def reverse_crawl(self, url): self.url = url ret = split_jenkins_url(url) self.server_url = ret[0] self.path_leaf = ret[1] self.base_path = ret[2] self.roots = [] self._reverse_crawl(self.path_leaf) # TODO: disable cache and craw from roots # for root in self.roots: # self._crawl(root.url) self.roots[0].extra = self.stats() return self.roots