def check(self, page): if self.is_status_expected(page.status): return True, None else: GLog.warning("Status code[%s] not expected for url: %s", page.status, page.url) return False, "Unexpected status code %s" % page.status
def check_bak(self, url, port=None): #extract protocol and address from the url if len(url.split(":")) > 1: protocol = url.split(":")[0] addr = url.split(":")[1].strip("/") else: protocol = "http" addr = url #extract host and path from the address host = addr.split("/", 1)[0] path = addr.split("/", 1)[1] if len( addr.split("/", 1)) > 1 else "" #send http request and get response #因为有些url访问的时候path必须以"/"开始才能访问,有些必须不以"/"开始 #才可以访问,因此需要测试两个路径(以"/"开始和不以"/"开始) #如果全部fail才标志为fail #print url, path status = get_status_code(protocol, host, port, path) if status is None or not self.is_status_expected(status): status = get_status_code(protocol, host, port, "/" + path) if status is None or not self.is_status_expected(status): GLog.warning("Status code[%s] not expected for url: %s", status, url) return 1 return 0
def check(self, url): status = utils.get_status_code(url) if not self.is_status_expected(status): GLog.warning("Status code[%s] not expected for url: %s", status, url) return 1 return 0
def read_list_from_file(file_name): if os.path.isfile(file_name): with open(file_name, "r") as in_fd: return [line.strip() for line in in_fd] else: GLog.warning("file[%s] not existed!", file_name) return []
def check(self, url): if self.whitelist is None and self.blacklist is None: GLog.fatal("You should input whitelist or blacklist at least") return 0 v1 = 0 if self.whitelist is not None: v1 = self.check_by_whitelist(url) v2 = 0 if self.blacklist is not None: v2 = self.check_by_blacklist(url) if v1 + v2 == 0: return (0, '') else: return (1, self.err_msg)
def check(self, url): content = utils.read_content(url) parser = HTMLTagParser() parser.feed(content) unmatch_tags = parser.close() for t in unmatch_tags: GLog.warning( "Broken %s tag[%s] in pos[%s,%s] in url: %s", "start" if t.start else "end", t.name, t.pos[0], t.pos[1], url, ) if len(unmatch_tags) != 0: return 1 else: return 0
def check_by_blacklist(self, url): links = self.get_url_list(url) #count represent fail number count = 0 for link in links: #获取url当中的host host = self.get_host_from_url(link) for domain in self.blacklist: if host.endswith(domain): count += 1 self.err_msg += 'Domain in blacklist [%s]\n' % link GLog.warning("Domain in blacklist [%s], parent url:%s", link, url) break #count==0 represent no wrong if count == 0: return 0 else: return 1
def check_by_whitelist(self, url): links = self.get_url_list(url) #count represent fail number count = 0 for link in links: #获取url当中的host host = self.get_host_from_url(link) #flag=True represent domain is ok flag = False for domain in self.whitelist: if host.endswith(domain): flag = True break if not flag: count += 1 self.err_msg += 'Domain not in whitelist [%s]\n' % link GLog.warning("Domain not in whitelist [%s], parent url: %s", link, url) #count==0 represent no wrong if count == 0: return 0 else: return 1
def check_url(self, url): #使用所有的checker检查某个url parent = url.parent if isinstance(url, URL) else None url = url.url if isinstance(url, URL) else url GLog.debug("Checking url: %s", url) failed_checker = 0 for checker in self.checkers: failed_checker += checker.check(url) #如果所有checker检查返回0(正常),直接返回0 if failed_checker == 0: return 0 #有checker检查失败了 if not self.is_white_url(url): GLog.fatal("URL check failed: %s, parent url: %s", url, parent) return 1 else: GLog.warning("URL check failed: %s, parent url: %s", url, parent) return 0
def check(self, page): GLog.debug("Checking url: %s", page.url) for checker in self.checkers: r, msg = checker.check(page) if not r: self.result.append(Result(page, msg))
def run_cmd(cmd): ret, output = commands.getstatusoutput(cmd) if ret != 0: GLog.warning("Run cmd[%s] failed! Error msg: %s", cmd, output) return ret, output