Example #1
0
    def work(self, site):
        _, hostname, _ = get_host(site)

        conn = utils.http_req(site)
        item = {
            "site": site,
            "hostname": hostname,
            "ip":"",
            "title": utils.get_title(conn.content),
            "status": conn.status_code,
            "headers": utils.get_headers(conn),
            "http_server":  conn.headers.get("Server", ""),
            "body_length": len(conn.content),
            "finger": [],
            "favicon": fetch_favicon(site)
        }
        domain_parsed = utils.domain_parsed(hostname)
        if domain_parsed:
            item["fld"] = domain_parsed["fld"]
            ips = utils.get_ip(hostname)
            if ips:
                item["ip"] = ips[0]
        else:
            item["ip"] = hostname

        self.site_info_list.append(item)
        if conn.status_code == 301 or conn.status_code == 302:
            url_302 = urljoin(site, conn.headers.get("Location", ""))
            if url_302 != site and url_302.startswith(site):
                self.work(url_302)
Example #2
0
 def test_fetch_fingerprint(self):
     site = "https://www.baidu.com/"
     conn = utils.http_req(site)
     headers = utils.get_headers(conn)
     title = utils.get_title(conn.content)
     finger_list = [
         {
             "name": "百度测试",
             "rule": {
                 "html": [
                     "百度"
                 ],
                 "title": [],
                 "headers": [],
                 "favicon_hash": []
             }
         },
         {
             "name": "百度测试2",
             "rule": {
                 "html": [],
                 "title": ["百度222222", "百度"],
                 "headers": [],
                 "favicon_hash": []
             }
         },
         {
             "name": "百度测试3",
             "rule": {
                 "html": [],
                 "title": [],
                 "headers": ["TTTBAIDUIDTTT", "BAIDUID"],
                 "favicon_hash": []
             }
         },
         {
             "name": "百度测试4",
             "rule": {
                 "html": [],
                 "title": [],
                 "headers": [],
                 "favicon_hash": [789, 123456]
             }
         }
     ]
     finger_list.extend(load_fingerprint())
     result = fetch_fingerprint(content=conn.content, headers=headers,
                                title=title, favicon_hash=789, finger_list=finger_list)
     self.assertTrue(len(result) >= 4)
     self.assertTrue(result[0] == finger_list[0]["name"])
     self.assertTrue(result[3] == finger_list[3]["name"])