def getProxy(files): logger.log('INFOR', f'正在获取代理IP') proxyDatas = getPage() logger.log('INFOR', f'总共获取{len(proxyDatas)}条代理IP') logger.log('INFOR', f'正在验证高质量代理IP') threads = [] sem = threading.Semaphore(threadNum) try: for i in proxyDatas: types = i[0] host = i[1] port = i[2] sem.acquire() t = ProxyInfo(types, host, port, sem) t.setDaemon(True) threads.append(t) t.start() for t in threads: t.join() except KeyboardInterrupt: pass if proxyList: logger.log('INFOR', f'获取{len(proxyList)}条高质量IP') for p in proxyList: with open(files, 'a', encoding="utf-8") as f: f.write(str(p)) f.write('\n') else: logger.log('ERROR', f'在线获取失败')
def checkProxyFile(): files = os.path.join(relative_directory, 'proxy.txt') if os.path.isfile(files): fileTamp = os.stat(files).st_mtime # 获取文件创建时间 timeArray = time.localtime(fileTamp) fileTime = time.strftime("%Y%m%d%H%M", timeArray) osTime = time.strftime("%Y%m%d%H%M", time.localtime()) contrast = int(osTime) - int(fileTime) # 代理文件创建超过15分钟,才会重新获取代理 if contrast >= 15: os.remove(files) getProxy(files) else: try: with open(files, 'r', encoding="utf-8") as f: for pro in f.readlines(): p = pro.strip() _proxy = eval(p) proxyList.append(_proxy) logger.log('INFOR', f'共获取 {len(proxyList)} 条高质量代理IP') except FileNotFoundError as e: logger.log('DEBUG', f'{str(e)}') pass else: getProxy(files)
def save_fofa(args, _q_fofa, _file): no_browser = args.browser a_template = fofa_template['html'] t_general = Template(a_template['general']) output_file_suffix = a_template['suffix'] report_name = 'fofa_%s_%s%s' % (os.path.basename(_file).lower().replace('.txt', ''), time.strftime('%Y%m%d_%H%M%S', time.localtime()), output_file_suffix) content = "" count = _q_fofa.qsize() while _q_fofa.qsize() > 0: url, title = _q_fofa.get() content += f"<tr><td></td><td>{title}</td><td></td><td></td><td><a href = '{url}' target='_blank'>{url}</a></td></tr><tr></tr>" result = t_general.substitute({ 'version': version, 'count': count, 'content': content }) with codecs.open('report/%s' % report_name, 'w', encoding='utf-8') as outFile: outFile.write(result) logger.log('INFOR', f'The results of the FOFA search saved to report/%s' % report_name) if no_browser: webbrowser.open_new_tab('file:///' + os.path.abspath('report/%s' % report_name))
def check_fofa(): # 当配置 fofa api 时, 检查api是否可用 if fofaApi['email'] and fofaApi['key']: logger.log('INFOR', f'正在验证fofa Api...') email = fofaApi['email'] key = fofaApi['key'] url = "https://fofa.so/api/v1/info/my?email={0}&key={1}".format( email, key) try: status = requests.get(url, headers=default_headers, timeout=10, verify=False).status_code if status != 200: logger.log('ERROR', f'状态码{status}, 请确保config/setting.py中fofaApi配置正确') exit(-1) logger.log('INFOR', f'fofa Api调用正常') return True except requests.exceptions.ReadTimeout as e: logger.log('ERROR', f'请求超时 {e}') exit(-1) except requests.exceptions.ConnectionError as e: logger.log('ERROR', f'网络超时 {e}') exit(-1) return False
def ip_to_int(ip): if isinstance(ip, int): return ip try: ipv4 = ipaddress.IPv4Address(ip) except Exception as e: logger.log('ERROR', f'{repr(e)}') return 0 return int(ipv4)
def check_cdn_cidr(ip): try: ip = ipaddress.ip_address(ip) except Exception as e: logger.log('DEBUG', f'{e}') return False for cidr in cdn_ip_cidr: if ip in ipaddress.ip_network(cidr): return True
def get_ip_list(url): host, scheme = get_host(url) try: ip = socket.gethostbyname(host) # 判断解析出来的ip是否为内网ip和是否已存在 if not intranet_ip(ip): return ip return ip except Exception: logger.log('ERROR', f'Invalid domain: {url}') return 'Invalid'
def run(target, checkcdn, progress_bar, progress): flag = False targets = [] ip = get_ip_list(target) # 无效域名不加入目标 if ip == 'Invalid': progress.advance(progress_bar) return [], '' targets.append(target) # cdn 是否检测 if checkcdn: # 只对域名做 CDN 检测,排除目标中的ip if re.match( r".*(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?).*", target): return [target], target data = [{ 'cname': get_cnames([], target), 'headers': get_headers(target), 'ip': ip }] for index, item in enumerate(data): cname = item.get('cname') if cname: if check_cname_keyword(cname): flag = True break try: headers = item.get('headers') if headers: headers = eval(headers).keys() if check_header_key(headers): flag = True break except Exception as e: logger.log('DEBUG', f'{e}') pass ip_tmp = item.get('ip') if check_cdn_cidr(ip_tmp) or check_cdn_asn(ip_tmp): flag = True break progress.advance(progress_bar) # 存在cdn 只检测url,否则,url、ip一起检测 if flag: return targets, target else: targets.append(ip) return targets, target
def run(self): try: with self.process: self.target_formatting() # fofa 查询url 初始化 loop = asyncio.get_event_loop() loop.run_until_complete(self.fetch_all(loop)) # fofa 搜索 self.session.close() self.is_life() # 对fofa搜到的结果,取出其中的web服务,然后对web服务进行验证是否可以访问 except Exception as e: logger.log("ERROR", e) return self.life_urls
def check_404_existence(self): try: try: self._404_status, _, html_doc = self.http_request( '/test-scan-404-existence-check') except Exception as e: logger.log('DEBUG', f'HTTP 404 check failed: {self.base_url} {str(e)}') self._404_status, _, html_doc = -1, {}, '' if self._404_status != 404: self.len_404_doc = len(html_doc) except Exception as e: logger.log('DEBUG', f'[Check_404] Exception {self.base_url} {str(e)}')
def _init_scripts(self): self.user_scripts = [] if self.args.no_scripts: # 全局禁用插件,无需导入 return for _script in self.args.script_files: # 跳过__init__.py if _script.startswith('pocs/scripts/__') or _script.startswith( 'pocs\\scripts\\__'): continue script_name_origin = os.path.basename(_script) script_name = script_name_origin.replace('.py', '') try: self.user_scripts.append( importlib.import_module('pocs.scripts.%s' % script_name)) except Exception as e: logger.log('ERROR', f'Fail to load script %s, {e}' % script_name)
def fetch(self, url): try: self.session.headers = self.headers # self.session.proxies = { # "https": "http://127.0.0.1:8080" # } response = self.session.get(url, timeout=10) if response.status_code == 200: datas = json.loads(response.text) # 查询结果没有出错时 if not datas['error']: self.target_info(datas['results']) else: logger.log("ERROR", f'fofa 查询失败,{response.status_code }') except Exception as e: logger.log("ERROR", e) pass
def scan_process(targets): target, q_results, args = targets[0], targets[1], targets[2] scanner = Scanner(args=args) try: ''' {'scheme': 'https', 'host': '127.0.0.1', 'port': 443, 'path': '', 'ports_open': [443, 8088], 'script': True, 'has_http': True} ''' # logger.log('INFOR', f'{target}') # 处理目标信息,加载规则,脚本等等 ret = scanner.init_from_url(target) if ret: host, results = scanner.scan() if results: q_results.put((host, results)) except Exception as e: logger.log('DEBUG', f'{e}') finally: return target
def print(self): """ InfoScan running entrance :return: All subdomain log :rtype: list """ print(SScan_banner) dt = datetime.now().strftime('%Y-%m-%d %H:%M:%S') print(f'[*] Starting InfoScan @ {dt}\n') self.check_param() self.config_param() if self.fofa: check_fofa() # 获取高质量的代理ip # checkProxyFile() if self.no_scripts: logger.log('INFOR', f'Scripts scan was disabled.') if self.require_ports: logger.log( 'INFOR', f'Scripts scan port check: %s' % ','.join([str(x) for x in self.require_ports]))
def scan(self): try: loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) import platform if platform.system() != "Windows": import uvloop asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) executor = ThreadPoolExecutor(self.args.t) tasks = [ loop.run_in_executor(executor, self.scan_worker, item) for item in self.url_list ] # 这一步很重要,使用loop.run_in_executor()函数: 内部接受的是阻塞的线程池,执行的函数,传入的参数 loop.run_until_complete(asyncio.wait(tasks)) loop.close() # 扫描完成后, 计算 self._403_url 的大小 if len(self._403_url) < 20: logger.log("DEBUG", f'对 {self.base_url} 进行 403 绕过测试') for resp in self._403_url: self.bypass_403(resp) # 等待所有的任务完成 for key in self.results.keys(): # todo 为何? # 超过5个网址在这个文件夹下发现,保留第一个 if len(self.results[key]) > 5: self.results[key] = self.results[key][:1] return self.base_url.lstrip('unknown://').rstrip( ':None'), self.results except Exception as e: logger.log('ERROR', f'[scan exception] {e}')
def crawl(self, path, do_not_process_links=False): try: status, headers, html_doc = self.http_request(path) if path == '/': self.index_status, self.index_headers, self.index_html_doc = status, headers, html_doc if self.args.crawl and not do_not_process_links and html_doc: soup = BeautifulSoup(html_doc, "html.parser") # 循环爬取a标签 for link in soup.find_all('a'): url = link.get('href', '').strip() if url.startswith('..'): continue if not url.startswith('/') and url.find('//') < 0: # 相对路径 url = path + url url, depth = cal_depth(self, url) if depth <= self.max_depth: self.enqueue(url) # 匹配rules的白名单规则 ret = self.find_text(html_doc) if ret: if '/' not in self.results: self.results['/'] = [] m = re.search('<title>(.*?)</title>', html_doc) title = m.group(1) if m else '' _ = { 'status': status, 'url': '%s%s' % (self.base_url, path), 'title': title, 'vul_type': ret[1] } if _ not in self.results['/']: self.results['/'].append(_) except Exception as e: logger.log('ERROR', f'[crawl Exception] %s %s' % (path, str(e)))
def getPage(): s = requests.Session() s.headers = { "User-Agent": random.choice(USER_AGENTS), } s.keep_alive = False proxyGit = "https://raw.githubusercontent.com/fate0/proxylist/master/proxy.list" proxyPage = "http://proxylist.fatezero.org/proxy.list" datasGit = [] datasPage = [] try: datasGit = s.get(proxyGit).text.split('\n') except requests.exceptions.ConnectionError: try: datasPage = s.get(proxyPage).text.split('\n') except requests.exceptions.ConnectionError as e: logger.log('ERROR', f'网络超时,代理获取失败,请重新获取 {e}') exit(0) datas = datasGit + datasPage proxyDatas = [] for proxy_str in datas: if proxy_str: proxy_json = json.loads(proxy_str) if country == "cn": if proxy_json['country'] == "CN": host = proxy_json['host'] port = proxy_json['port'] types = proxy_json['type'] proxyDatas.append([types, host, port]) else: host = proxy_json['host'] port = proxy_json['port'] types = proxy_json['type'] proxyDatas.append([types, host, port]) return proxyDatas
def check_param(self): """ Check parameter """ if not (self.file or self.dire or self.host): msg = '\nself missing! One of following self should be specified \n' \ ' \t--f TargetFile \n' \ ' \t--d TargetDirectory \n' \ ' \t--host www.host1.com www.host2.com 8.8.8.8' logger.log('FATAL', msg) exit(-1) if self.file and not os.path.isfile(self.file): logger.log('FATAL', f'TargetFile not found: {self.file}') exit(-1) if self.dire and not os.path.isdir(self.dire): logger.log('FATAL', f'TargetFile not found: {self.dire}') exit(-1) self.network = int(self.network) if not (8 <= self.network <= 32): logger.log('FATAL', f'Network should be an integer between 24 and 31') exit(-1)
def init_final(self): if self.scheme == 'http' and self.port == 80 or self.scheme == 'https' and self.port == 443: self.base_url = f'{self.scheme}://{self.host}' elif self.scheme != 'unknown' and self.host.find(':') >= 0: self.base_url = f'{self.scheme}://{self.host}' else: self.base_url = f'{self.scheme}://{self.host}:{self.port}' if not self.has_http: logger.log( 'DEBUG', f'NO_HTTP_Scan %s:%s' % (self.host, self.port) if self.port else 'Scan %s' % self.host) # 脚本 if self.script: for _ in self.user_scripts: self.url_list.append((_, '/')) if not self.has_http or self.args.scripts_only: # 未发现HTTP服务 或 只依赖插件扫描 return # todo 当url 类似 http://www.example.com , path:'' , max_depth = 1+5=6 self.max_depth = cal_depth(self, self.path)[1] + 5 self.check_404_existence() if self._404_status == -1: logger.log('DEBUG', f'HTTP 404 check failed %s' % self.base_url) elif self._404_status != 404: logger.log( 'DEBUG', f'%s has no HTTP 404. {self._404_status}' % self.base_url) _path, _depth = cal_depth(self, self.path) # 加入队列 self.enqueue('/')
def ctrl_quit(_sig, _frame): logger.log('ALERT', f'Scan aborted.') os._exit(0)
def read_rules(rule_files): text_to_find = [] regex_to_find = [] text_to_exclude = [] regex_to_exclude = [] rules_set = set() rules_set_root_only = set() p_tag = re.compile('{tag="(.*?)"}') p_status = re.compile(r'{status=(\d{3})}') p_content_type = re.compile('{type="(.*?)"}') p_content_type_no = re.compile('{type_no="(.*?)"}') _files = rule_files # 读取规则 for rule_file in _files: with open(rule_file, 'r', encoding='utf-8') as infile: vul_type = os.path.basename(rule_file)[:-4] for url in infile.readlines(): url = url.strip() if url.startswith('/'): _ = p_tag.search(url) tag = _.group(1) if _ else '' # 没有tag字段时,赋空 _ = p_status.search(url) status = int(_.group(1)) if _ else 0 _ = p_content_type.search(url) content_type = _.group(1) if _ else '' _ = p_content_type_no.search(url) content_type_no = _.group(1) if _ else '' root_only = True if url.find('{root_only}') >= 0 else False rule = (url.split()[0], tag, status, content_type, content_type_no, root_only, vul_type) if root_only: if rule not in rules_set_root_only: rules_set_root_only.add(rule) else: logger.log('ERROR', f'Duplicated root only rule: {rule}') else: if rule not in rules_set: rules_set.add(rule) else: logger.log('ERROR', f'Duplicated rule: {rule}') # 读取匹配黑/白名单 re_text = re.compile('{text="(.*)"}') re_regex_text = re.compile('{regex_text="(.*)"}') white_file_path = 'pocs/rules/white.list' if not os.path.exists(white_file_path): logger.log('ERROR', f'File not exist: {white_file_path}') return for _line in open(white_file_path, 'r', encoding='utf-8'): _line = _line.strip() if not _line or _line.startswith('#'): continue _m = re_text.search(_line) if _m: text_to_find.append(_m.group(1)) else: _m = re_regex_text.search(_line) if _m: regex_to_find.append(re.compile(_m.group(1))) black_file_path = 'pocs/rules/black.list' if not os.path.exists(black_file_path): logger.log('ERROR', f'File not exist: {black_file_path}') return for _line in open(black_file_path, 'r', encoding='utf-8'): _line = _line.strip() if not _line or _line.startswith('#'): continue _m = re_text.search(_line) if _m: text_to_exclude.append(_m.group(1)) else: _m = re_regex_text.search(_line) if _m: regex_to_exclude.append(re.compile(_m.group(1))) return text_to_find, regex_to_find, text_to_exclude, regex_to_exclude, rules_set, rules_set_root_only
def scan_worker(self, item): if not self.flag and time.time() - self.start_time > self.timeout: self.flag = True if self.flag: self.url_list.clear() # self.flag = False logger.log('ERROR', f'Timed out task: %s' % self.base_url) return url, url_description, tag, status_to_match, content_type, content_type_no, root_only, vul_type, prefix = None, None, None, None, None, None, None, None, None try: if len(item) == 2: # Script Scan check_func = getattr(item[0], 'do_check') check_func(self, item[1]) else: # ({'prefix': '', 'full_url': '/trace'}, 'Spring boot serverProperties', 200, '', '', True, 'springboot') url_description, tag, status_to_match, content_type, content_type_no, root_only, vul_type = item prefix = url_description['prefix'] url = url_description['full_url'] ''' {sub} 这个是规则里设置的, 主要是根据当前域名来做字典, 比如{sub}.sql ,当前域名为baidu.com ,则规则改为 baidu.sql ''' if url.find('{sub}') >= 0: if not self.domain_sub: return url = url.replace('{sub}', self.domain_sub) except Exception as e: logger.log('ERROR', f'[scan_worker.1][%s %s] {e}' % (item[0], item[1])) return if not item or not url: return # 开始规则目录探测 try: status, headers, html_doc = self.http_request(url) cur_content_type = headers.get('content-type', '') cur_content_length = headers.get('content-length', len(html_doc)) if self.find_exclude_text(html_doc): # 黑名单规则排除 return if 0 <= int(cur_content_length) <= 10: # text too short return if cur_content_type.find('image/') >= 0: # exclude image return # 当指定 content_type 时, if content_type and content_type != 'json' and cur_content_type.find( 'json') >= 0: return # content type mismatch if (content_type and cur_content_type.find(content_type) < 0) or ( content_type_no and cur_content_type.find(content_type_no) >= 0): return if tag and html_doc.find(tag) < 0: return # tag mismatch # 在页面中匹配rules的白名单规则 if self.find_text(html_doc) and status == 200: valid_item = True else: # status code check if status_to_match == 206 and status != 206: return if status_to_match in (200, 206) and status in (200, 206): valid_item = True elif status_to_match and status != status_to_match: return elif status in (403, 404) and status != status_to_match: return else: valid_item = True if status == self._404_status and url != '/': len_doc = len(html_doc) len_sum = self.len_404_doc + len_doc if len_sum == 0 or (0.4 <= float(len_doc) / len_sum <= 0.6): return if valid_item: m = re.search('<title>(.*?)</title>', html_doc) title = m.group(1) if m else '' if prefix not in self.results: self.results[prefix] = [] _ = { 'status': status, 'url': '%s%s' % (self.base_url, url), 'title': title, 'vul_type': vul_type } if _ not in self.results[prefix]: self.results[prefix].append(_) except Exception: logger.log('ERROR', f'[scan_worker.2][%s%s]' % (self.base_url, url)) logger.log('DEBUG', f'{traceback.format_exc()}')
def save_report(args, _q_results, _file, tasks_processed_count): no_browser = args.browser start_time = time.time() a_template = template['html'] t_general = Template(a_template['general']) t_host = Template(a_template['host']) t_list_item = Template(a_template['list_item']) output_file_suffix = a_template['suffix'] report_name = '%s_%s%s' % (os.path.basename(_file).lower().replace('.txt', ''), time.strftime('%Y%m%d_%H%M%S', time.localtime()), output_file_suffix) html_doc = content = "" vulnerable_hosts_count = 0 console_width = getTerminalSize()[0] - 2 try: while not setting.stop_me or _q_results.qsize() > 0: if _q_results.qsize() == 0: time.sleep(0.1) continue while _q_results.qsize() > 0: item = _q_results.get() if type(item) is str: message = '[%s] %s' % (time.strftime('%H:%M:%S', time.localtime()), item) if args.network <= 22 and (item.startswith('Scan ') or item.startswith('No ports open')): sys.stdout.write(message + (console_width - len(message)) * ' ' + '\r') else: logger.log('INFOR', f'{message}') continue host, results = item vulnerable_hosts_count += 1 for key in results.keys(): for url in results[key]: vul_type = url['vul_type'] if 'vul_type' in url else '' logger.log('INFOR', f"[+]{url['status'] if url['status'] else ''} {vul_type} {url['url']}") _str = "" for key in results.keys(): for _ in results[key]: _str += t_list_item.substitute( {'status': ' [%s]' % _['status'] if _['status'] else '', 'url': _['url'], 'title': '[%s]' % _['title'] if _['title'] else '', 'vul_type': escape(_['vul_type'].replace('_', ' ')) if 'vul_type' in _ else ''} ) _str = t_host.substitute({'host': host, 'list': _str}) content += _str cost_time = time.time() - start_time cost_min = int(cost_time / 60) cost_min = '%s min' % cost_min if cost_min > 0 else '' cost_seconds = '%.2f' % (cost_time % 60) html_doc = t_general.substitute({ 'version': version, 'tasks_processed_count': tasks_processed_count, 'vulnerable_hosts_count': vulnerable_hosts_count, 'cost_min': cost_min, 'cost_seconds': cost_seconds, 'content': content }) with codecs.open('report/%s' % report_name, 'w', encoding='utf-8') as outFile: outFile.write(html_doc) if html_doc: cost_time = time.time() - start_time cost_min = int(cost_time / 60) cost_min = '%s min' % cost_min if cost_min > 0 else '' cost_seconds = '%.1f' % (cost_time % 60) html_doc = t_general.substitute({ 'version': version, 'tasks_processed_count': tasks_processed_count, 'vulnerable_hosts_count': vulnerable_hosts_count, 'cost_min': cost_min, 'cost_seconds': cost_seconds, 'content': content }) with codecs.open('report/%s' % report_name, 'w', encoding='utf-8') as outFile: outFile.write(html_doc) time.sleep(1.0) logger.log('INFOR', f'%s vulnerable targets on sites in total.' % vulnerable_hosts_count) logger.log('INFOR', f'Scan report saved to report/%s' % report_name) if no_browser: webbrowser.open_new_tab('file:///' + os.path.abspath('report/%s' % report_name)) else: logger.log('INFOR', f'No vulnerabilities found on sites in %s' % _file) except IOError as e: if e.errno == errno.EPIPE: sys.exit(-1) except Exception as e: logger.log('ERROR', f'[save_report_thread Exception] %s %s' % (type(e), str(e))) sys.exit(-1)
def enqueue(self, path): try: path = str(path) except Exception as e: logger.log('DEBUG', f'{str(e)}') return False try: # BBScan 中 当 path 中存在数字时,将url中的数字替换成 {num} /asdas12asd >> /asdas{num}asd # todo 看不懂在干嘛 # url_pattern = re.sub(r'\d+', '{num}', path) url_pattern = path if url_pattern in self.urls_processed or len( self.urls_processed) >= self.links_limit: return False self.urls_processed.add(url_pattern) if self.args.crawl: # 爬取网站的 a 标签 self.crawl(path) else: self.index_status, self.index_headers, self.index_html_doc = self.http_request( '/') if self._404_status != -1: # valid web service # 网站主目录下扫描全部rule, 即rule和root_only标记的rule, 其他目录下扫描 只扫描rule rule_set_to_process = [ self.rules_set, self.rules_set_root_only ] if path == '/' else [self.rules_set] # 加载规则 for rule_set in rule_set_to_process: for _ in rule_set: # _ ('/scripts/samples', 'IIS', 200, '', '', True, 'iis') try: full_url = path.rstrip('/') + _[0] except Exception as e: logger.log('DEBUG', f'{str(e)}') continue if full_url in self.urls_enqueued: continue url_description = { 'prefix': path.rstrip('/'), 'full_url': full_url } item = (url_description, _[1], _[2], _[3], _[4], _[5], _[6]) self.url_list.append(item) self.urls_enqueued.add(full_url) # 本来若只找到 /asdd/asd/ 这种链接,没有/asdd/ 这个子目录,会将/asdd/子目录添加进去处理 if path.count('/') >= 2: self.enqueue('/'.join(path.split('/')[:-2]) + '/') # sub folder enqueue if path != '/' and not self.no_scripts: for script in self.user_scripts: self.url_list.append((script, path)) return True except Exception as e: logger.log('ERROR', f'[_enqueue.exception] %s' % str(e)) logger.log('DEBUG', f'{traceback.format_exc()}') return False
def http_request(self, url, timeout=10): try: if not url: url = '/' if not self.session: return -1, {}, '' # 使用代理,但是代理效果不是很好,这里就不使用了 # self.session.proxies = random.choice(proxyList) # # self.session.proxies = { # "https": "https://127.0.0.1:8080", # "http": "http://127.0.0.1:8080" # } resp = self.session.get(self.base_url + url, allow_redirects=False, headers=default_headers, timeout=timeout) headers = resp.headers status = resp.status_code # 502出现3次以上,排除该站点 if status == 502: self.status_502_count += 1 if self.status_502_count > 3: self.url_list.clear() try: if self.session: self.session.close() except Exception as e: logger.log('DEBUG', f'{str(e)}') pass self.session = None # 301 永久移动时,重新获取response if status == 301: target = headers.get('Location') if not target.startswith('/file:'): try: resp = self.session.get(URL(target, encoded=True), headers=default_headers, allow_redirects=False, timeout=timeout, verify=False) headers = resp.headers except Exception as e: logger.log('DEBUG', f'{e}, {target} {self.base_url + url}') pass # 前面禁止重定向, 但有时,网页重定向后才会有东西 if status == 302: new_url = headers["Location"] if new_url not in self._302_url: resp = self.session.get(URL(new_url, encoded=True), headers=default_headers, timeout=timeout, verify=False) headers = resp.headers self._302_url.add(new_url) html_doc = get_html(headers, resp) # 页面不在黑名单规则里面时, 403 返回包 记录,扫描完成后计算大小,然后再判断是否进行403绕过 # 若 403 返回包 的最终个数小于20,则不进行绕过测试,认为是一种网站防扫描措施 if not self.find_exclude_text(html_doc) and status == 403: self._403_url.append(url) logger.log('DEBUG', f'{self.base_url + url} status: {status}') return status, headers, html_doc except requests.exceptions.RetryError as e: logger.log('DEBUG', f'{str(e)} {self.base_url + url}') return -1, {}, '' except requests.exceptions.ReadTimeout as e: logger.log('DEBUG', f'{str(e)} {self.base_url + url}') return -1, {}, '' except requests.exceptions.ConnectionError as e: logger.log('DEBUG', f'IP可能被封了 {str(e)} {self.base_url + url}') return -1, {}, '' except TypeError as e: logger.log('DEBUG', f'{str(e)} {self.base_url + url}') return -1, {}, '' except Exception as e: logger.log('DEBUG', f'{str(e)} {self.base_url + url}') logger.log('DEBUG', f'{traceback.format_exc()}') return -1, {}, ''
def main(self): q_targets = multiprocessing.Manager().Queue() # targets Queue q_targets_list = [] q_results = multiprocessing.Manager().Queue() # results Queue fofa_result = multiprocessing.Manager().Queue() # results Queue # 目标处理完成,扫描进程才可以开始退出 process_targets_done = multiprocessing.Value('i', 0) for input_file in self.input_files: # 读取目标 if self.host: target_list = self.host.replace(',', ' ').strip().split() elif self.file or self.dire: with open(input_file, encoding='UTF-8', errors='ignore') as inFile: target_list = list(set(inFile.readlines())) try: import threading # 实时生成报告 target_count = len(target_list) # 目标数 # 生成报告,管理标准输出 threading.Thread(target=save_report, args=(self, q_results, input_file, target_count)).start() clear_queue(q_results) clear_queue(q_targets) process_targets_done.value = 0 start_time = time.time() p = multiprocessing.Process(target=prepare_targets, args=(target_list, q_targets, self, fofa_result)) p.daemon = True p.start() p.join() # join 是用来阻塞当前线程的,p.start()之后,p 就提示主进程,需要等待p结束才向下执行 logger.log( 'INFOR', f'All preparations have been completed and it took %.1f seconds!' % (time.time() - start_time)) # 根据电脑 CPU 的内核数量, 创建相应的进程池 # count = multiprocessing.cpu_count() count = 30 # 少量目标,至多创建2倍扫描进程 if len(target_list) * 2 < count: count = len(target_list) * 2 if self.fofa and fofa_result.qsize() > 0: # fofa 搜索结果保存 save_fofa(self, fofa_result, input_file) while True: if not q_targets.empty(): q_targets_list.append(q_targets.get()) else: break # q_targets.get() {'scheme': 'https', 'host': '127.0.0.1', 'port': 443, 'path': '', 'ports_open': [80, 443], 'is_neighbor': 0} progress = Progress( "[progress.description]{task.description}", BarColumn(), "[progress.percentage]{task.percentage:>3.1f}%", "•", "[bold green]{task.completed}/{task.total}", transient=True, # 100%后隐藏进度条 ) with progress: targets = [] for target in q_targets_list: tmp = [target, q_results, self] targets.append(tmp) progress_bar = progress.add_task("[cyan]Leak detection...", total=len(targets), start=False) with multiprocessing.Pool(processes=count) as pool: results = pool.imap_unordered(scan_process, targets) for result in results: # progress.print(result) progress.advance(progress_bar) pool.close() pool.join() cost_time = time.time() - start_time cost_min = int(cost_time / 60) cost_min = '%s min ' % cost_min if cost_min > 0 else '' cost_seconds = '%.1f' % (cost_time % 60) logger.log( 'INFOR', f'Scanned {len(q_targets_list)} targets in {cost_min}{cost_seconds} seconds.' ) except Exception as e: logger.log('FATAL', f'[__main__.exception] %s' % repr(e)) import traceback logger.log('FATAL', traceback.format_exc()) setting.stop_me = True
def config_param(self): """ Config parameter """ if self.dire: self.dire = glob.glob(self.dire + '/*.txt') if self.rule is None: self.rule_files = glob.glob('pocs/rules/*.txt') else: if isinstance(self.rule, str): rule = self.rule.split() else: rule = self.rule for rule_name in rule: if not rule_name.endswith('.txt'): rule_name += '.txt' if not os.path.exists('pocs/rules/%s' % rule_name): logger.log('FATAL', f'Rule file not found: {rule_name}') exit(-1) self.rule_files.append(f'pocs/rules/{rule_name}') # 没有指定只使用脚本时 if not self.scripts_only: self.text_to_find, self.regex_to_find, self.text_to_exclude, self.regex_to_exclude, self.rules_set, self.rules_set_root_only = read_rules( self.rule_files) # 脚本使用时 if not self.no_scripts: if self.script is None: self.script_files = glob.glob('pocs/scripts/*.py') else: if isinstance(self.script, str): script = self.script.split() else: script = self.script for script_name in script: if not script_name.lower().endswith('.py'): script_name += '.py' if not os.path.exists('pocs/scripts/%s' % script_name): logger.log('FATAL', f'script file not found: {script_name}') exit(-1) self.script_files.append('pocs/scripts/%s' % script_name) pattern = re.compile(r'ports_to_check.*?=(.*)') for _script in self.script_files: with open(_script, encoding='UTF-8', errors='ignore') as f: content = f.read() if content.find('self.http_request') >= 0 or content.find( 'self.session') >= 0: self.require_no_http = False # 插件依赖HTTP连接池 if content.find('self.index_') >= 0: self.require_no_http = False self.require_index_doc = True # 获取插件需要的端口 m = pattern.search(content) if m: m_str = m.group(1).strip() if m_str.find('#') >= 0: # 去掉注释 m_str = m_str[:m_str.find('#')] if m_str.find('[') < 0: if int(m_str) not in self.require_ports: self.require_ports.add(int(m_str)) else: for port in eval(m_str): if port not in self.require_ports: self.require_ports.add(int(port))