def checkAllow3XXDir(dirUrl): ''' @description: 检查该目录是否都会重定向 3XX @param {dirUrl: 输入的dirUrl,没有index.html之类的文件名} @return: boolean ''' randomCollect = string.ascii_letters + string.digits randomStr1 = ''.join(random.sample(randomCollect, random.randint(20, 30))) randomUrl1 = "%s%s.html" % (dirUrl, randomStr1) randomStr2 = ''.join(random.sample(randomCollect, random.randint(20, 30))) randomUrl2 = "%s%s.html" % (dirUrl, randomStr2) # 判断是否均为3XX try: response1 = requests.get(randomUrl1, allow_redirects=False, timeout=3) response2 = requests.get(randomUrl2, allow_redirects=False, timeout=3) if response1.status_code == response2.status_code and \ 300 < response1.status_code and response1.status_code < 400: location1 = None if not "Location" in response1.headers else response1.headers[ 'Location'] location2 = None if not "Location" in response2.headers else response2.headers[ 'Location'] # 特殊处理下[;]/[#]/[?]的问题... if not location1 or not location2: return None location1 = location1.split(";")[0].split("#")[0].split("?")[0] location2 = location2.split(";")[0].split("#")[0].split("?")[0] # 两次随机的页面,重定向到同一个,后续不管了 if location1 and location1 == location2: return False except Exception as e: print('[x] error:{}'.format(e)) tracebackLogger() return None return True
def getSubDirList(url): ''' @description: 返回url的所有子路径 http://127.0.0.1/aaa/bbb/ccc/ddd.php -> aaa bbb ccc @param {url:输入的url} @return: 返回包含输入所有路径的list ''' subDirList = [] # 先判断是否有协议 if not re.match(r'^https?://.+', url): url = 'http://%s' % url try: # 尝试解析url parsedUrl = urllib.parse.urlparse(url) # 检测域名情况 if parsedUrl.netloc == '': return subDirList # 检测域名和路径,多重路径均加入 list 中 if parsedUrl.path != '': pathList = parsedUrl.path.split('/') # 全是路径 if url.endswith('/'): subDirList.extend(pathList) # 最后一个是文件 else: subDirList.extend(pathList[:-1]) except Exception as e: print('[x] error:{}'.format(e)) tracebackLogger() # 识别失败,有多少返回多少吧 pass resultList = [] for subDir in subDirList: if subDir == "": resultList.append(subDir) return resultList
def _get_option(section, option): try: cf = ConfigParser() cf.read(paths.CONFIG_PATH) return cf.get(section=section, option=option) except: tracebackLogger() return ''
def loadSingleDict(path): ''' @description: 添加单个字典文件 @param {path:字典文件路径} @return: ''' try: # 加载文件 with open(path) as single_file: return single_file.read().splitlines() except Exception as e: tracebackLogger() return []
def loadMultDict(path): ''' @description: 添加多个字典文件 @param {path:字典文件路径} @return: list:文件夹下字典文件每行的内容 ''' tmpList = [] try: for filename in os.listdir(path): tmpList.extend(loadSingleDict(os.path.join(path, filename))) return tmpList except Exception as e: tracebackLogger() return []
def worker(self): ''' @description: 封包发包穷举器 @param {type} @return: ''' currentTaskURL = scan.taskQueue.get() # 最新在执行的任务 scan.nowTask = currentTaskURL scan.taskCount += 1 # 自定义 headers headers = {} if conf.request_headers: for header in conf.request_headers.split(','): k, v = header.split('=') headers[k] = v # 自定义 User-Agent if conf.request_header_ua: headers['User-Agent'] = conf.request_header_ua # 自定义cookie if conf.request_header_cookie: headers['Cookie'] = conf.request_header_cookie # 延迟请求 if conf.request_delay: random_sleep_second = random.randint(0, abs(conf.request_delay)) time.sleep(random_sleep_second) # 发包 try: response = requests.request(conf.request_method, currentTaskURL, headers=headers, timeout=conf.request_timeout, verify=False, allow_redirects=conf.redirection_302, proxies=conf.proxy_server) # 进入结果处理流程 self.responseHandler(currentTaskURL, response) except requests.exceptions.Timeout as e: # TODO:超时处理 pass except Exception as e: print('[x] error:{}'.format(e)) tracebackLogger() finally: # 更新进度 pass
def getTargetList(url): ''' @description: 返回url的所有路径 @param {url:输入的url} @return: 返回包含输入所有路径的list ''' urlList = [] # 先判断是否有协议 if not re.match(r'^https?://.+', url): url = 'http://%s' % url try: # 尝试解析url parsedUrl = urllib.parse.urlparse(url) # 检测域名情况 if parsedUrl.netloc == '': return urlList # 检测域名和路径,多重路径均加入 list 中 if parsedUrl.path != '': pathList = parsedUrl.path.split('/') # 判断后缀为路径 / 文件 if pathList[-1] == '' or '.' in pathList[-1]: pathList = pathList[:-1] # 倒序遍历路径,添加进 list 中 for i in range(1, len(pathList))[::-1]: targetPath = '/'.join(pathList[1:i + 1]) target = "%s://%s/%s/" % (parsedUrl.scheme, parsedUrl.netloc, targetPath) urlList.append(target) # netloc 加入 list 中 target = "%s://%s/" % (parsedUrl.scheme, parsedUrl.netloc) urlList.append(target) except Exception as e: print('[x] error:{}'.format(e)) tracebackLogger() # 识别失败,有多少返回多少吧 pass return urlList
def check404Page(url): ''' @description: 检查404页面,收集信息 @param {url:输入的url} @return: 返回:{"isChecked":True, "checkList":["status":200, "crc32":"xxx"]} or {"isChecked":True, "checkList":["status":404, "crc32":""]} ''' checkResult = { "isChecked": False, "checkList": [{ "status": 404, "crc32": "" }] } try: # 随机页面 randomCollect = string.ascii_letters + string.digits randomStr = ''.join( random.sample(randomCollect, random.randint(20, 30))) randomUrl = "%s%s.html" % (url, randomStr) response = requests.get(randomUrl, timeout=3) # 检测 status code # 如果是 404 ,直接返回就行 if response.status_code == 404: checkResult[ 'isChecked'] = True # {"isChecked":True, "status":404, "crc32":""} # 200的话,检查404页面的crc~ elif response.status_code == 200: randomStr_1 = ''.join( random.sample(randomCollect, random.randint(20, 30))) randomUrl_1 = "%s%s.html" % (url, randomStr) response_1 = requests.get(randomUrl_1, timeout=3) # 替换掉可能出现的随机字符串... path = urllib.parse.urlparse(randomUrl).path path_1 = urllib.parse.urlparse(randomUrl).path compText = response.text compText_1 = response_1.text compText = response.text.replace(path, '').replace(randomStr, '') compText_1 = response_1.text.replace(path_1, '').replace(randomStr_1, '') if crc32(compText) == crc32(compText_1): crc32Str = crc32(compText) checkResult['isChecked'] = True checkResult['checkList'].append({ "status": 200, "crc32": crc32Str }) # TODO:检查title的指定字符串? else: pass # 不然就搞不定了,后续再研究是否有其他特征~ else: pass # 检查目录的404问题,有些网站 xxx.html 和 xxx/ 返回不一样的404... randomUrl = "%s%s/" % (url, randomStr) response = requests.get(randomUrl, timeout=3) # 检测 status code # 如果是 404 ,直接返回就行 if response.status_code == 404: checkResult['isChecked'] = True # 200的话,检查404页面的crc~ elif response.status_code == 200: randomStr_1 = ''.join( random.sample(randomCollect, random.randint(20, 30))) randomUrl_1 = "%s%s/" % (url, randomStr) response_1 = requests.get(randomUrl_1, timeout=3) # 替换掉可能出现的随机字符串... path = urllib.parse.urlparse(randomUrl).path path_1 = urllib.parse.urlparse(randomUrl).path compText = response.text compText_1 = response_1.text compText = response.text.replace(path, '').replace(randomStr, '') compText_1 = response_1.text.replace(path_1, '').replace(randomStr_1, '') if crc32(compText) == crc32(compText_1): crc32Str = crc32(compText) checkResult['isChecked'] = True checkResult['checkList'].append({ "status": 200, "crc32": crc32Str }) except Exception as e: tracebackLogger() return checkResult