def init_final(self): try: if self.conn_pool: self.conn_pool.close() except Exception as e: pass if self.scheme == 'http' and self.port == 80 or self.scheme == 'https' and self.port == 443: self.base_url = '%s://%s' % (self.scheme, self.host) else: self.base_url = '%s://%s:%s' % (self.scheme, self.host, self.port) if self.has_http: self.print_msg('Scan %s' % self.base_url) else: self.print_msg('Scan %s:%s' % (self.host, self.port) if self.port else 'Scan %s' % self.host) if self.has_http: if self.scheme == 'https': self.conn_pool = HTTPSConnPool(self.host, port=self.port, maxsize=self.args.t, headers=config.default_headers) else: self.conn_pool = HTTPConnPool(self.host, port=self.port, maxsize=self.args.t, headers=config.default_headers) if self.args.require_index_doc: self.crawl('/', do_not_process_links=True) if self.no_scripts != 1: # 不是重复目标 80 443 跳转的,不需要重复扫描 # 当前目标disable, 或者 全局开启插件扫描 if self.args.scripts_only or not self.no_scripts: for _ in self.user_scripts: self.url_queue.put((_, '/')) if not self.has_http or self.args.scripts_only: # 未发现HTTP服务 或 只依赖插件扫描 return self.max_depth = cal_depth(self, self.path)[1] + 5 if self.args.no_check404: self._404_status = 404 else: self.check_404_existence() if self._404_status == -1: self.print_msg('[Warning] HTTP 404 check failed <%s:%s>' % (self.host, self.port)) elif self._404_status != 404: self.print_msg('[Warning] %s has no HTTP 404.' % self.base_url) _path, _depth = cal_depth(self, self.path) self.enqueue('/') if _path != '/' and not self.log_file: self.enqueue(_path)
def init_final(self): try: if self.conn_pool: self.conn_pool.close() except Exception as e: pass default_port = 443 if self.schema.lower() == 'https' else 80 self.host, self.port = self.host.split( ':') if self.host.find(':') > 0 else (self.host, default_port) self.port = int(self.port) if self.schema == 'http' and self.port == 80 or self.schema == 'https' and self.port == 443: self.base_url = '%s://%s' % (self.schema, self.host) else: self.base_url = '%s://%s:%s' % (self.schema, self.host, self.port) is_port_open = self.is_port_open() if is_port_open: if self.schema == 'https': self.conn_pool = HTTPSConnPool(self.host, port=self.port, maxsize=self.args.t * 2, headers=HEADERS) else: self.conn_pool = HTTPConnPool(self.host, port=self.port, maxsize=self.args.t * 2, headers=HEADERS) if self.args.scripts_only or (not is_port_open and not self.args.no_scripts): for _ in self.user_scripts: self.url_queue.put((_, '/')) print_msg('Scan with scripts: %s' % self.host) return if not is_port_open: return self.max_depth = cal_depth(self, self.path)[1] + 5 if self.args.no_check404: self._404_status = 404 self.has_status_404 = True else: self.check_404_existence() if self._404_status == -1: print_msg('[Warning] HTTP 404 check failed <%s:%s>' % (self.host, self.port)) elif not self.has_status_404: print_msg('[Warning] %s has no HTTP 404.' % self.base_url) _path, _depth = cal_depth(self, self.path) self.enqueue('/') self.enqueue(_path) if not self.args.no_crawl and not self.log_file: self.crawl(_path)
def crawl(self, path, do_not_process_links=False): try: # increase body size to 200 KB headers = dict(config.default_headers, Range='bytes=0-204800') status, headers, html_doc = self.http_request(path, headers=headers) if path == '/': self.index_status, self.index_headers, self.index_html_doc = status, headers, html_doc if not self.args.no_crawl and not do_not_process_links and html_doc: soup = BeautifulSoup(html_doc, "html.parser") for link in soup.find_all('a'): url = link.get('href', '').strip() if url.startswith('..'): continue if not url.startswith('/') and url.find('//') < 0: # relative path url = path + url url, depth = cal_depth(self, url) # print url, depth if depth <= self.max_depth: self.enqueue(url) # ret = self.find_text(html_doc) if ret: if '/' not in self.results: self.results['/'] = [] m = re.search('<title>(.*?)</title>', html_doc) title = m.group(1) if m else '' _ = {'status': status, 'url': '%s%s' % (self.base_url, path), 'title': title, 'vul_type': ret[1]} if _ not in self.results['/']: self.results['/'].append(_) except Exception as e: self.print_msg('[crawl Exception] %s %s' % (path, str(e)))
def crawl_index(self, path): try: status, headers, html_doc = self._http_request(path) if status != 200: try: html_doc = self.conn_pool.urlopen( 'GET', self.url, headers=headers_without_range, retries=1).data html_doc = decode_response_text(html_doc) except Exception as e: pass self.index_status, self.index_headers, self.index_html_doc = status, headers, html_doc # save index content soup = BeautifulSoup(html_doc, "html.parser") for link in soup.find_all('a'): url = link.get('href', '').strip() url, depth = cal_depth(self, url) if depth <= self.max_depth: self._enqueue(url) if self.find_text(html_doc): self.results['/'] = [] m = re.search('<title>(.*?)</title>', html_doc) title = m.group(1) if m else '' _ = { 'status': status, 'url': '%s%s' % (self.base_url, path), 'title': title } if _ not in self.results['/']: self.results['/'].append(_) except Exception as e: logging.error('[crawl_index Exception] %s' % str(e)) traceback.print_exc()
def init_final(self): try: self.conn_pool.close() except: pass default_port = 443 if self.schema.lower() == 'https' else 80 self.host, self.port = self.host.split( ':') if self.host.find(':') > 0 else (self.host, default_port) self.port = int(self.port) if self.schema == 'http' and self.port == 80 or self.schema == 'https' and self.port == 443: self.base_url = '%s://%s' % (self.schema, self.host) else: self.base_url = '%s://%s:%s' % (self.schema, self.host, self.port) is_port_open = self.is_port_open() if is_port_open: if self.schema == 'https': self.conn_pool = HTTPSConnPool(self.host, port=self.port, maxsize=self.args.t * 2, headers=headers) else: self.conn_pool = HTTPConnPool(self.host, port=self.port, maxsize=self.args.t * 2, headers=headers) if not is_port_open: return self.max_depth = cal_depth(self, self.path)[1] + 5 if self.args.no_check404: self._404_status = 404 self.has_404 = True else: self.check_404() # check existence of HTTP 404 if not self.has_404: print_msg('[Warning] %s has no HTTP 404.' % self.host) self.request_index(self.path) self.gather_info() _path, _depth = cal_depth(self, self.path) self._enqueue('/') self._enqueue(_path) if not self.args.no_crawl and not self.log_file: self.crawl_index()
def load_all_urls_from_log_file(self): try: with open(self.log_file) as infile: for _line in infile.xreadlines(): _ = _line.strip().split() if len(_) == 3 and (_[2].find('^^^200') > 0 or _[2].find('^^^403') > 0 or _[2].find('^^^302') > 0): url, depth = cal_depth(self, _[1]) self.enqueue(url) except Exception as e: self.print_msg('[load_all_urls_from_log_file] %s' % str(e))
def load_all_urls_from_log_file(self): try: with open(self.log_file) as inFile: for line in inFile.xreadlines(): _ = line.strip().split() if len(_) == 3 and (_[2].find('^^^200') > 0 or _[2].find('^^^403') > 0 or _[2].find('^^^302') > 0): url, depth = cal_depth(self, _[1]) self._enqueue(url) except Exception as e: logging.error('[load_all_urls_from_log_file Exception] %s' % str(e)) traceback.print_exc()
def crawl_index(self): for url in self.index_a_urls: url, depth = cal_depth(self, url) if depth <= self.max_depth: self._enqueue(url) if self.find_text(self.index_html_doc): self.results['/'] = [] m = re.search('<title>(.*?)</title>', self.index_html_doc) title = m.group(1) if m else '' _ = { 'status': self.index_status, 'url': '%s%s' % (self.base_url, self.path), 'title': title } if _ not in self.results['/']: self.results['/'].append(_)
def gather_info(self): if not self.server: self.server = check_server(self.index_headers.get('server', '')) if not self.lang: self.lang, self.framework = check_lang(self.base_url, self.index_headers) if self.lang == 'unknown': for url in self.index_a_urls: url, depth = cal_depth(self, url) lang = check_lang_url(url) if lang != 'unknown': self.lang = lang break self.rewrite = check_rewrite(self.server, self.lang)
def crawl(self, path): try: headers = dict( HEADERS, Range='bytes=0-204800') # allowed size increased to 200 kb status, headers, html_doc = self.http_request(path, headers=headers) if path == '/': self.index_status, self.index_headers, self.index_html_doc = status, headers, html_doc if self.index_html_doc: soup = BeautifulSoup(html_doc, "html.parser") for link in soup.find_all('a'): url = link.get('href', '').strip() if url.startswith('..'): continue if not url.startswith('/') and url.find('//') < 0: url = path + url url, depth = cal_depth(self, url) # print url, depth if depth <= self.max_depth: self.enqueue(url) ret = self.find_text(html_doc) if ret: if '/' not in self.results: self.results['/'] = [] m = re.search('<title>(.*?)</title>', html_doc) title = m.group(1) if m else '' _ = { 'status': status, 'url': '%s%s' % (self.base_url, path), 'title': title, 'vul_type': ret[1] } if _ not in self.results['/']: self.results['/'].append(_) except Exception as e: print_msg('[crawl Exception] %s %s' % (path, str(e))) traceback.print_exc()