def audit(self): method = self.requests.command # 请求方式 GET or POST headers = self.requests.get_headers() # 请求头 dict类型 url = self.build_url() # 请求完整URL data = self.requests.get_body_data().decode() # POST 数据 resp_data = self.response.get_body_data() # 返回数据 byte类型 resp_str = self.response.get_body_str() # 返回数据 str类型 自动解码 resp_headers = self.response.get_headers() # 返回头 dict类型 path1 = get_parent_paths(url) urls = set(path1) for link in get_links(resp_str, url, True): path1 = get_parent_paths(link) urls |= set(path1) flag_list = [ "directory listing for", "<title>directory", "<head><title>index of", '<table summary="directory listing"', 'last modified</a>', ] for p in urls: if not Share.in_url(p): Share.add_url(p) try: r = requests.get(p, headers=headers) for i in flag_list: if i in r.text.lower(): out.success(p, self.name) break except Exception as e: pass
def audit(self): method = self.requests.command # 请求方式 GET or POST headers = self.requests.get_headers() # 请求头 dict类型 url = self.build_url() # 请求完整URL data = self.requests.get_body_data().decode() # POST 数据 resp_data = self.response.get_body_data() # 返回数据 byte类型 resp_str = self.response.get_body_str() # 返回数据 str类型 自动解码 resp_headers = self.response.get_headers() # 返回头 dict类型 path1 = get_parent_paths(url) urls = set(path1) for link in get_links(resp_str, url, True): path1 = get_parent_paths(link) urls |= set(path1) flag = { "/.svn/all-wcprops": "svn:wc:ra_dav:version-url", "/.git/config": 'repositoryformatversion' } for p in urls: for f in flag.keys(): _ = p.rstrip('/') + f if not Share.in_url(_): Share.add_url(_) try: r = requests.get(_, headers=headers) # out.log(_) if flag[f] in r.text: out.success(_, self.name) except Exception as e: pass
def audit(self): method = self.requests.command # 请求方式 GET or POST headers = self.requests.get_headers() # 请求头 dict类型 url = self.build_url() # 请求完整URL data = self.requests.get_body_data().decode() # POST 数据 resp_data = self.response.get_body_data() # 返回数据 byte类型 resp_str = self.response.get_body_str() # 返回数据 str类型 自动解码 resp_headers = self.response.get_headers() # 返回头 dict类型 path1 = get_parent_paths(url) urls = set(path1) for link in get_links(resp_str, url, True): path1 = get_parent_paths(link) urls |= set(path1) for p in urls: filename = self.file() success = [] for f in filename: _ = p.rstrip('/') + f if not Share.in_url(_): Share.add_url(_) try: r = requests.get(_, headers=headers) # out.log(_) if r.status_code == 200: success.append({"url": _, "code": len(r.text)}) # print(self.name) except Exception as e: pass if len(success) < 5: for i in success: out.success(i["url"], self.name) else: result = {} for item in success: length = item.get("len", 0) if length not in result: result[length] = list() result[length].append(item["url"]) for k, v in result.items(): if len(v) > 3: continue for i in v: out.success(i, self.name)
def test_get_parent_paths(self): url = "https://github.com/w-digital-scanner/w9scan/blob/master/plugins/spider_file/bcrpscan.py" p = urlparse(url) r = get_parent_paths(p.path, False) d = [ '/w-digital-scanner/w9scan/blob/master/plugins/spider_file/', '/w-digital-scanner/w9scan/blob/master/plugins/', '/w-digital-scanner/w9scan/blob/master/', '/w-digital-scanner/w9scan/blob/', '/w-digital-scanner/w9scan/', '/w-digital-scanner/', '/' ] self.assertTrue(r == d) r = get_parent_paths(url, True) d2 = [ 'https://github.com/w-digital-scanner/w9scan/blob/master/plugins/spider_file/', 'https://github.com/w-digital-scanner/w9scan/blob/master/plugins/', 'https://github.com/w-digital-scanner/w9scan/blob/master/', 'https://github.com/w-digital-scanner/w9scan/blob/', 'https://github.com/w-digital-scanner/w9scan/', 'https://github.com/w-digital-scanner/', 'https://github.com/' ] self.assertTrue(r == d2)
def audit(self): method = self.requests.command # 请求方式 GET or POST headers = self.requests.get_headers() # 请求头 dict类型 url = self.build_url() # 请求完整URL post_data = self.requests.get_body_data().decode( errors='ignore') # POST 数据 resp_data = self.response.get_body_data() # 返回数据 byte类型 resp_str = self.response.get_body_str() # 返回数据 str类型 自动解码 resp_headers = self.response.get_headers() # 返回头 dict类型 encoding = self.response.decoding or 'utf-8' p = self.requests.urlparse = urlparse(url) netloc = self.requests.netloc = "{}://{}{}".format( p.scheme, p.netloc, p.path) self.requests.tld = get_fld(netloc, fix_protocol=True, fail_silently=True) data = unquote(p.query, encoding) params = paramToDict(data, place=PLACE.GET) self.requests.params = params if "cookie" in headers: self.requests.cookies = paramToDict(headers["cookie"], place=PLACE.COOKIE) if method == "POST": post_data = unquote(post_data, encoding) if re.search( '([^=]+)=([^%s]+%s?|\Z)' % (DEFAULT_GET_POST_DELIMITER, DEFAULT_GET_POST_DELIMITER), post_data): self.requests.post_hint = POST_HINT.NORMAL self.requests.post_data = paramToDict( post_data, place=PLACE.POST, hint=self.requests.post_hint) elif re.search(JSON_RECOGNITION_REGEX, post_data): self.requests.post_hint = POST_HINT.JSON self.requests.post_data = paramToDict( post_data, place=PLACE.POST, hint=self.requests.post_hint) elif re.search(XML_RECOGNITION_REGEX, post_data): self.requests.post_hint = POST_HINT.XML elif re.search(JSON_LIKE_RECOGNITION_REGEX, post_data): self.requests.post_hint = POST_HINT.JSON_LIKE elif re.search(ARRAY_LIKE_RECOGNITION_REGEX, post_data): self.requests.post_hint = POST_HINT.ARRAY_LIKE self.requests.post_data = paramToDict( post_data, place=PLACE.POST, hint=self.requests.post_hint) elif re.search(MULTIPART_RECOGNITION_REGEX, post_data): self.requests.post_hint = POST_HINT.MULTIPART # 支持自动识别并转换参数的类型有 NORMAL,JSON,ARRAY-LIKE if self.requests.post_hint and self.requests.post_hint in [ POST_HINT.NORMAL, POST_HINT.JSON, POST_HINT.ARRAY_LIKE ]: if KB["spiderset"].add( method + url + ''.join(self.requests.post_data), 'PostScan'): task_push('PostScan', self.requests, self.response) elif self.requests.post_hint is None: print("post data数据识别失败") elif method == "GET": if KB["spiderset"].add(method + url, 'PerFile'): task_push('PerFile', self.requests, self.response) # Send PerScheme domain = "{}://{}".format(p.scheme, p.netloc) if KB["spiderset"].add('GET' + domain, 'PerScheme'): task_push('PerScheme', self.requests, self.response) # Collect from response links = get_links(resp_str, url, True) for link in set(links): is_continue = True for item in logoutParams: if item in link.lower(): is_continue = False break if not is_continue: continue # 去重复 if not KB["spiderset"].add('GET' + link, 'get_links'): continue try: # 超过5M拒绝请求 r = requests.head(link, headers=headers) if "Content-Length" in r.headers: if int(r.headers["Content-Length"]) > 1024 * 1024 * 5: raise Exception("length > 5M") r = requests.get(link, headers=headers) req = FakeReq(link, headers) resp = FakeResp(r) except Exception as e: continue if KB["spiderset"].add('GET' + resp._url, 'PerFile'): task_push('PerFile', req, resp) # Collect directory from response urls = set(get_parent_paths(url)) for link in set(links): urls |= set(get_parent_paths(link)) for i in urls: if not KB["spiderset"].add('GET' + i, 'get_link_directory'): continue try: r = requests.get(i, headers=headers) req = FakeReq(i, headers) resp = FakeResp(r) except: continue if KB["spiderset"].add('GET' + resp._url, 'PerFolder'): task_push('PerFolder', req, resp)