def get_404_page(self): url1 = self.domain + "/s4dsad" url2 = self.domain + "/asdas/1.txt" retry = 1 while True: if retry == 3: return res1, code1, error1 = my_request(url1) res2, code2, error2 = my_request(url2) if error1 or error2: warning = str(error1) + ": retry %d" % retry logging.warning(warning) retry = retry + 1 else: break if code1 == 404 and code2 == 404: self._404_page = { "code": code1, "page": None, } return elif code1 == 200 and code2 == 200: page1 = res1.read() page2 = res2.read() ratio = self.page_ratio(page1, page2) if ratio > 0.7: self._404_page = { "code": 200, "page": page1, } return else: self._404_page = None return
def check(self): while not self.Q.empty(): ret = True finger = json.loads(self.Q.get()) data = urllib.urlencode(finger["request"]["data"]) name = finger["name"] url = finger["request"]["url"] headers = finger["request"]["headers"] method = finger["request"]["method"] res_headers = finger["response"]["headers"] res_code = finger["response"]["code"] md5 = finger["response"]["md5"] html = finger["response"]["html"].encode("utf-8") url = self.domain + url if (name not in self.data): # print url if method == "POST": res, code, error = my_request(url=url, data=data, headers=headers) else: res, code, error = my_request(url=url, headers=headers) if error: logging.warning(error) continue if res: buf = res.read() else: buf = '' if res_code: ret &= (res_code == code) if html: ret &= (html in buf) if res_headers: ret &= (res_headers in str(res.headers)) if md5: ret &= (md5 == self.get_md5(buf)) if ret: info = "Find finger %s" % name logging.info(info) self.data.append(name) else: continue
def check_network(self, domain): domain = "http://" + domain res, code, error = my_request(domain) if not error: return True else: warning = str(error) print warning return False
def get_title(self, domain): url = "http://" + domain res, code, error = my_request(url) if res: try: title = re.search(r'<title>(.*)</title>', res.read()).group(1).encode("utf8") except: title = "None" elif error: title = str(error) else: title = str(code) print "[get_title]: " + title return title
def cmp_page(self, url): res, code, error = my_request(url=url) if self._404_page["page"]: if code == 200: if self.page_ratio(res.read(), self._404_page["page"]) < CONFIG.DIR_RATIO: return True else: return False else: return False elif self._404_page["code"] == 404: if code == 200: return True else: return False else: return False