Beispiel #1
0
    def init_final(self):
        try:
            if self.conn_pool:
                self.conn_pool.close()
        except Exception as e:
            pass

        if self.scheme == 'http' and self.port == 80 or self.scheme == 'https' and self.port == 443:
            self.base_url = '%s://%s' % (self.scheme, self.host)
        else:
            self.base_url = '%s://%s:%s' % (self.scheme, self.host, self.port)

        if self.has_http:
            self.print_msg('Scan %s' % self.base_url)
        else:
            self.print_msg('Scan %s:%s' %
                           (self.host, self.port) if self.port else 'Scan %s' %
                           self.host)

        if self.has_http:
            if self.scheme == 'https':
                self.conn_pool = HTTPSConnPool(self.host,
                                               port=self.port,
                                               maxsize=self.args.t,
                                               headers=config.default_headers)
            else:
                self.conn_pool = HTTPConnPool(self.host,
                                              port=self.port,
                                              maxsize=self.args.t,
                                              headers=config.default_headers)
            if self.args.require_index_doc:
                self.crawl('/', do_not_process_links=True)

        if self.no_scripts != 1:  # 不是重复目标 80 443 跳转的,不需要重复扫描
            # 当前目标disable, 或者 全局开启插件扫描
            if self.args.scripts_only or not self.no_scripts:
                for _ in self.user_scripts:
                    self.url_queue.put((_, '/'))

        if not self.has_http or self.args.scripts_only:  # 未发现HTTP服务 或  只依赖插件扫描
            return

        self.max_depth = cal_depth(self, self.path)[1] + 5
        if self.args.no_check404:
            self._404_status = 404
        else:
            self.check_404_existence()
        if self._404_status == -1:
            self.print_msg('[Warning] HTTP 404 check failed <%s:%s>' %
                           (self.host, self.port))
        elif self._404_status != 404:
            self.print_msg('[Warning] %s has no HTTP 404.' % self.base_url)
        _path, _depth = cal_depth(self, self.path)

        self.enqueue('/')
        if _path != '/' and not self.log_file:
            self.enqueue(_path)
Beispiel #2
0
    def init_final(self):
        try:
            if self.conn_pool:
                self.conn_pool.close()
        except Exception as e:
            pass
        default_port = 443 if self.schema.lower() == 'https' else 80
        self.host, self.port = self.host.split(
            ':') if self.host.find(':') > 0 else (self.host, default_port)
        self.port = int(self.port)
        if self.schema == 'http' and self.port == 80 or self.schema == 'https' and self.port == 443:
            self.base_url = '%s://%s' % (self.schema, self.host)
        else:
            self.base_url = '%s://%s:%s' % (self.schema, self.host, self.port)

        is_port_open = self.is_port_open()
        if is_port_open:
            if self.schema == 'https':
                self.conn_pool = HTTPSConnPool(self.host,
                                               port=self.port,
                                               maxsize=self.args.t * 2,
                                               headers=HEADERS)
            else:
                self.conn_pool = HTTPConnPool(self.host,
                                              port=self.port,
                                              maxsize=self.args.t * 2,
                                              headers=HEADERS)

        if self.args.scripts_only or (not is_port_open
                                      and not self.args.no_scripts):
            for _ in self.user_scripts:
                self.url_queue.put((_, '/'))
            print_msg('Scan with scripts: %s' % self.host)
            return

        if not is_port_open:
            return

        self.max_depth = cal_depth(self, self.path)[1] + 5
        if self.args.no_check404:
            self._404_status = 404
            self.has_status_404 = True
        else:
            self.check_404_existence()
        if self._404_status == -1:
            print_msg('[Warning] HTTP 404 check failed <%s:%s>' %
                      (self.host, self.port))
        elif not self.has_status_404:
            print_msg('[Warning] %s has no HTTP 404.' % self.base_url)
        _path, _depth = cal_depth(self, self.path)
        self.enqueue('/')
        self.enqueue(_path)
        if not self.args.no_crawl and not self.log_file:
            self.crawl(_path)
Beispiel #3
0
    def crawl(self, path, do_not_process_links=False):
        try:
            # increase body size to 200 KB
            headers = dict(config.default_headers, Range='bytes=0-204800')
            status, headers, html_doc = self.http_request(path, headers=headers)
            if path == '/':
                self.index_status, self.index_headers, self.index_html_doc = status, headers, html_doc
            if not self.args.no_crawl and not do_not_process_links and html_doc:
                soup = BeautifulSoup(html_doc, "html.parser")
                for link in soup.find_all('a'):
                    url = link.get('href', '').strip()
                    if url.startswith('..'):
                        continue
                    if not url.startswith('/') and url.find('//') < 0:   # relative path
                        url = path + url
                    url, depth = cal_depth(self, url)
                    # print url, depth
                    if depth <= self.max_depth:
                        self.enqueue(url)
                #
                ret = self.find_text(html_doc)
                if ret:
                    if '/' not in self.results:
                        self.results['/'] = []
                    m = re.search('<title>(.*?)</title>', html_doc)
                    title = m.group(1) if m else ''
                    _ = {'status': status, 'url': '%s%s' % (self.base_url, path), 'title': title, 'vul_type': ret[1]}
                    if _ not in self.results['/']:
                        self.results['/'].append(_)

        except Exception as e:
            self.print_msg('[crawl Exception] %s %s' % (path, str(e)))
Beispiel #4
0
    def crawl_index(self, path):
        try:
            status, headers, html_doc = self._http_request(path)
            if status != 200:
                try:
                    html_doc = self.conn_pool.urlopen(
                        'GET',
                        self.url,
                        headers=headers_without_range,
                        retries=1).data
                    html_doc = decode_response_text(html_doc)
                except Exception as e:
                    pass
            self.index_status, self.index_headers, self.index_html_doc = status, headers, html_doc  # save index content
            soup = BeautifulSoup(html_doc, "html.parser")
            for link in soup.find_all('a'):
                url = link.get('href', '').strip()
                url, depth = cal_depth(self, url)
                if depth <= self.max_depth:
                    self._enqueue(url)
            if self.find_text(html_doc):
                self.results['/'] = []
                m = re.search('<title>(.*?)</title>', html_doc)
                title = m.group(1) if m else ''
                _ = {
                    'status': status,
                    'url': '%s%s' % (self.base_url, path),
                    'title': title
                }
                if _ not in self.results['/']:
                    self.results['/'].append(_)

        except Exception as e:
            logging.error('[crawl_index Exception] %s' % str(e))
            traceback.print_exc()
Beispiel #5
0
    def init_final(self):
        try:
            self.conn_pool.close()
        except:
            pass
        default_port = 443 if self.schema.lower() == 'https' else 80
        self.host, self.port = self.host.split(
            ':') if self.host.find(':') > 0 else (self.host, default_port)
        self.port = int(self.port)
        if self.schema == 'http' and self.port == 80 or self.schema == 'https' and self.port == 443:
            self.base_url = '%s://%s' % (self.schema, self.host)
        else:
            self.base_url = '%s://%s:%s' % (self.schema, self.host, self.port)

        is_port_open = self.is_port_open()
        if is_port_open:
            if self.schema == 'https':
                self.conn_pool = HTTPSConnPool(self.host,
                                               port=self.port,
                                               maxsize=self.args.t * 2,
                                               headers=headers)
            else:
                self.conn_pool = HTTPConnPool(self.host,
                                              port=self.port,
                                              maxsize=self.args.t * 2,
                                              headers=headers)

        if not is_port_open:
            return

        self.max_depth = cal_depth(self, self.path)[1] + 5
        if self.args.no_check404:
            self._404_status = 404
            self.has_404 = True
        else:
            self.check_404()  # check existence of HTTP 404
        if not self.has_404:
            print_msg('[Warning] %s has no HTTP 404.' % self.host)

        self.request_index(self.path)
        self.gather_info()

        _path, _depth = cal_depth(self, self.path)
        self._enqueue('/')
        self._enqueue(_path)
        if not self.args.no_crawl and not self.log_file:
            self.crawl_index()
Beispiel #6
0
 def load_all_urls_from_log_file(self):
     try:
         with open(self.log_file) as infile:
             for _line in infile.xreadlines():
                 _ = _line.strip().split()
                 if len(_) == 3 and (_[2].find('^^^200') > 0 or _[2].find('^^^403') > 0 or _[2].find('^^^302') > 0):
                     url, depth = cal_depth(self, _[1])
                     self.enqueue(url)
     except Exception as e:
         self.print_msg('[load_all_urls_from_log_file] %s' % str(e))
Beispiel #7
0
 def load_all_urls_from_log_file(self):
     try:
         with open(self.log_file) as inFile:
             for line in inFile.xreadlines():
                 _ = line.strip().split()
                 if len(_) == 3 and (_[2].find('^^^200') > 0
                                     or _[2].find('^^^403') > 0
                                     or _[2].find('^^^302') > 0):
                     url, depth = cal_depth(self, _[1])
                     self._enqueue(url)
     except Exception as e:
         logging.error('[load_all_urls_from_log_file Exception] %s' %
                       str(e))
         traceback.print_exc()
Beispiel #8
0
 def crawl_index(self):
     for url in self.index_a_urls:
         url, depth = cal_depth(self, url)
         if depth <= self.max_depth:
             self._enqueue(url)
     if self.find_text(self.index_html_doc):
         self.results['/'] = []
         m = re.search('<title>(.*?)</title>', self.index_html_doc)
         title = m.group(1) if m else ''
         _ = {
             'status': self.index_status,
             'url': '%s%s' % (self.base_url, self.path),
             'title': title
         }
         if _ not in self.results['/']:
             self.results['/'].append(_)
Beispiel #9
0
    def gather_info(self):
        if not self.server:
            self.server = check_server(self.index_headers.get('server', ''))

        if not self.lang:
            self.lang, self.framework = check_lang(self.base_url,
                                                   self.index_headers)

        if self.lang == 'unknown':
            for url in self.index_a_urls:
                url, depth = cal_depth(self, url)
                lang = check_lang_url(url)
                if lang != 'unknown':
                    self.lang = lang
                    break
        self.rewrite = check_rewrite(self.server, self.lang)
Beispiel #10
0
    def crawl(self, path):
        try:
            headers = dict(
                HEADERS,
                Range='bytes=0-204800')  # allowed size increased to 200 kb
            status, headers, html_doc = self.http_request(path,
                                                          headers=headers)
            if path == '/':
                self.index_status, self.index_headers, self.index_html_doc = status, headers, html_doc
            if self.index_html_doc:
                soup = BeautifulSoup(html_doc, "html.parser")
                for link in soup.find_all('a'):
                    url = link.get('href', '').strip()
                    if url.startswith('..'):
                        continue
                    if not url.startswith('/') and url.find('//') < 0:
                        url = path + url
                    url, depth = cal_depth(self, url)
                    # print url, depth
                    if depth <= self.max_depth:
                        self.enqueue(url)
                ret = self.find_text(html_doc)
                if ret:
                    if '/' not in self.results:
                        self.results['/'] = []
                    m = re.search('<title>(.*?)</title>', html_doc)
                    title = m.group(1) if m else ''
                    _ = {
                        'status': status,
                        'url': '%s%s' % (self.base_url, path),
                        'title': title,
                        'vul_type': ret[1]
                    }
                    if _ not in self.results['/']:
                        self.results['/'].append(_)

        except Exception as e:
            print_msg('[crawl Exception] %s %s' % (path, str(e)))
            traceback.print_exc()