def init_wato(self): if not self._missing_but_required_wato_files(): logger.info("WATO is already initialized -> Skipping initializiation") return logger.debug("Initializing WATO...") web = CMKWebSession(self) web.login() web.set_language("en") # Call WATO once for creating the default WATO configuration logger.debug("Requesting wato.py (which creates the WATO factory settings)...") response = web.get("wato.py?mode=sites").text #logger.debug("Debug: %r" % response) assert "<title>Distributed Monitoring</title>" in response assert "replication_status_%s" % web.site.id in response, \ "WATO does not seem to be initialized: %r" % response logger.debug("Waiting for WATO files to be created...") wait_time = 20.0 while self._missing_but_required_wato_files() and wait_time >= 0: time.sleep(0.5) wait_time -= 0.5 missing_files = self._missing_but_required_wato_files() assert not missing_files, \ "Failed to initialize WATO data structures " \ "(Still missing: %s)" % missing_files self._add_wato_test_config(web)
def init_wato(self): if not self._missing_but_required_wato_files(): print("WATO is already initialized -> Skipping initializiation") return web = CMKWebSession(self) web.login() web.set_language("en") # Call WATO once for creating the default WATO configuration response = web.get("wato.py").text assert "<title>WATO" in response assert "<div class=\"title\">Manual Checks</div>" in response, \ "WATO does not seem to be initialized: %r" % response wait_time = 20 while self._missing_but_required_wato_files() and wait_time >= 0: time.sleep(0.5) wait_time -= 0.5 missing_files = self._missing_but_required_wato_files() assert not missing_files, \ "Failed to initialize WATO data structures " \ "(Still missing: %s)" % missing_files self._add_wato_test_config(web)
def init_wato(self): if not self._missing_but_required_wato_files(): logger.info( "WATO is already initialized -> Skipping initializiation") return logger.debug("Initializing WATO...") web = CMKWebSession(self) web.login() # Call WATO once for creating the default WATO configuration logger.debug( "Requesting wato.py (which creates the WATO factory settings)...") response = web.get("wato.py?mode=sites").text #logger.debug("Debug: %r" % response) assert "site=%s" % web.site.id in response logger.debug("Waiting for WATO files to be created...") wait_time = 20.0 while self._missing_but_required_wato_files() and wait_time >= 0: time.sleep(0.5) wait_time -= 0.5 missing_files = self._missing_but_required_wato_files() assert not missing_files, \ "Failed to initialize WATO data structures " \ "(Still missing: %s)" % missing_files web.enforce_non_localized_gui() self._add_wato_test_config(web)
class Worker(threading.Thread): def __init__(self, num, crawler): super(Worker, self).__init__() self.name = "worker-%d" % num self.crawler = crawler self.daemon = True self.terminate = False self.idle = True self.client = CMKWebSession(self.crawler.site) self.client.login() self.client.set_language("en") def run(self): while not self.terminate: try: while not self.terminate: url = self.crawler.todo.get(block=False) self.idle = False try: self.visit_url(url) except Exception as e: self.error( url, "Failed to visit: %s\n%s" % (e, traceback.format_exc())) self.crawler.todo.task_done() except queue.Empty: self.idle = True time.sleep(0.5) def stop(self): self.terminate = True def visit_url(self, url): if url.url in self.crawler.visited: logger.info("Already visited: %s", url.url) return self.crawler.visited.append(url.url) #print("%s - Visiting #%d (todo %d): %s" % # (self.name, len(self.crawler.visited), self.crawler.todo.qsize(), url.url)) started = time.time() try: #print "FETCH", url.url_without_host() response = self.client.get(url.url_without_host()) except AssertionError as e: if "This view can only be used in mobile mode" in "%s" % e: logger.info("Skipping mobile mode view checking") return raise duration = time.time() - started self.update_stats(url, duration, len(response.content)) content_type = response.headers.get('content-type') #print self.name, content_type, len(response.text) if content_type.startswith("text/html"): self.check_response(url, response) elif content_type.startswith("text/plain"): pass # no specific test elif content_type.startswith("text/csv"): pass # no specific test elif content_type in ["image/png", "image/gif"]: pass # no specific test elif content_type in ["application/pdf"]: pass # no specific test elif content_type in [ "application/x-rpm", "application/x-deb", "application/x-debian-package", "application/x-gzip", "application/x-msdos-program", "application/x-msi", "application/x-tgz", "application/x-redhat-package-manager", "application/x-pkg", "application/x-tar", "application/json", "text/x-chdr", "text/x-c++src", "text/x-sh", ]: pass # no specific test else: self.error(url, "Unknown content type: %s" % (content_type)) return def update_stats(self, url, duration, content_size): stats = self.crawler.stats.setdefault( url.neutral_url(), { "first_duration": duration, "first_content_size": content_size, }) avg_duration = (duration + stats.get("avg_duration", duration)) / 2.0 avg_content_size = (content_size + stats.get("avg_content_size", content_size)) / 2.0 stats.update({ "orig_url": url.orig_url, "referer_url": url.referer_url, "num_visited": stats.get("num_visited", 0) + 1, "last_duration": duration, "last_content_size": content_size, "avg_duration": avg_duration, "avg_content_size": avg_content_size, }) def error(self, url, s): s = "[%s - found on %s] %s" % (url.url, url.referer_url, s) self.crawler.error(s) def check_response(self, url, response): soup = BeautifulSoup(response.text, "lxml") # The referenced resources (images, stylesheets, javascript files) are checked by # the generic web client handler. This only needs to reaslize the crawling. self.check_content(url, response, soup) self.check_links(url, soup) self.check_frames(url, soup) self.check_iframes(url, soup) def check_content(self, url, response, soup): ignore_texts = [ "This view can only be used in mobile mode.", # Some single context views are accessed without their context information, which # results in a helpful error message since 1.7. These are not failures that this test # should report. "Missing context information", # Same for dashlets that are related to a specific context "There are no metrics meeting your context filters", ] for element in soup.select("div.error"): inner_html = "%s" % element skip = False for ignore_text in ignore_texts: if ignore_text in inner_html: skip = True break if not skip: self.error(url, "Found error: %s" % (element)) def check_frames(self, url, soup): self.check_referenced(url, soup, "frame", "src") def check_iframes(self, url, soup): self.check_referenced(url, soup, "iframe", "src") def check_links(self, url, soup): self.check_referenced(url, soup, "a", "href") def check_referenced(self, referer_url, soup, tag, attr): elements = soup.find_all(tag) for element in elements: orig_url = element.get(attr) if orig_url is None: continue # Skip elements that don't have the attribute in question url = self.normalize_url(self.crawler.site.internal_url, orig_url) if url is None: continue try: self.verify_is_valid_url(url) except InvalidUrl: #print self.name, "skip invalid", url, e self.crawler.skipped.add(url) continue # Ensure that this url has not been crawled yet crawl_it = False with self.crawler.handled_lock: if url not in self.crawler.handled: crawl_it = True self.crawler.handled.add(url) if crawl_it: #open("/tmp/todo", "a").write("%s (%s)\n" % (url, referer_url.url)) self.crawler.todo.put( Url(url, orig_url=orig_url, referer_url=referer_url.url)) def verify_is_valid_url(self, url): parsed = urlsplit(url) if parsed.scheme != "http": raise InvalidUrl("invalid scheme: %r" % (parsed, )) # skip external urls if url.startswith("http://") and not url.startswith( self.crawler.site.internal_url): raise InvalidUrl("Skipping external URL: %s" % url) # skip non check_mk urls if not parsed.path.startswith("/%s/check_mk" % self.crawler.site.id) \ or "../pnp4nagios/" in parsed.path \ or "../nagvis/" in parsed.path \ or "check_mk/plugin-api" in parsed.path \ or "../nagios/" in parsed.path: raise InvalidUrl("Skipping non Check_MK URL: %s %s" % (url, parsed)) # skip current url with link to index if "index.py?start_url=" in url: raise InvalidUrl("Skipping link to index with current URL: %s" % url) if "logout.py" in url: raise InvalidUrl("Skipping logout URL: %s" % url) if "_transid=" in url: raise InvalidUrl("Skipping action URL: %s" % url) if "selection=" in url: raise InvalidUrl("Skipping selection URL: %s" % url) # TODO: Remove this exclude when ModeCheckManPage works without an # automation call. Currently we have to use such a call to enrich the # man page with some additional info from config.check_info, see # AutomationGetCheckManPage. if "mode=check_manpage" in url and "wato.py" in url: raise InvalidUrl("Skipping man page URL: %s" % url) # Don't follow filled in filter form views if "view.py" in url and "filled_in=filter" in url: raise InvalidUrl("Skipping filled in filter URL: %s" % url) # Don't follow the view editor if "edit_view.py" in url: raise InvalidUrl("Skipping view editor URL: %s" % url) # Skip agent download files if parsed.path.startswith("/%s/check_mk/agents/" % self.crawler.site.id): raise InvalidUrl("Skipping agent download file: %s" % url) def normalize_url(self, base_url, url): url = urljoin(base_url, url.rstrip("#")) parsed = list(urlsplit(url)) parsed[3] = urlencode( sorted(parse_qsl(parsed[3], keep_blank_values=True))) return urlunsplit(parsed)