def work(self, site): _, hostname, _ = get_host(site) conn = utils.http_req(site) item = { "site": site, "hostname": hostname, "ip":"", "title": utils.get_title(conn.content), "status": conn.status_code, "headers": utils.get_headers(conn), "http_server": conn.headers.get("Server", ""), "body_length": len(conn.content), "finger": [], "favicon": fetch_favicon(site) } domain_parsed = utils.domain_parsed(hostname) if domain_parsed: item["fld"] = domain_parsed["fld"] ips = utils.get_ip(hostname) if ips: item["ip"] = ips[0] else: item["ip"] = hostname self.site_info_list.append(item) if conn.status_code == 301 or conn.status_code == 302: url_302 = urljoin(site, conn.headers.get("Location", "")) if url_302 != site and url_302.startswith(site): self.work(url_302)
def test_fetch_fingerprint(self): site = "https://www.baidu.com/" conn = utils.http_req(site) headers = utils.get_headers(conn) title = utils.get_title(conn.content) finger_list = [ { "name": "百度测试", "rule": { "html": [ "百度" ], "title": [], "headers": [], "favicon_hash": [] } }, { "name": "百度测试2", "rule": { "html": [], "title": ["百度222222", "百度"], "headers": [], "favicon_hash": [] } }, { "name": "百度测试3", "rule": { "html": [], "title": [], "headers": ["TTTBAIDUIDTTT", "BAIDUID"], "favicon_hash": [] } }, { "name": "百度测试4", "rule": { "html": [], "title": [], "headers": [], "favicon_hash": [789, 123456] } } ] finger_list.extend(load_fingerprint()) result = fetch_fingerprint(content=conn.content, headers=headers, title=title, favicon_hash=789, finger_list=finger_list) self.assertTrue(len(result) >= 4) self.assertTrue(result[0] == finger_list[0]["name"]) self.assertTrue(result[3] == finger_list[3]["name"])