예제 #1
0
파일: BBScan.py 프로젝트: sry309/BBScan
    def init_final(self):
        try:
            if self.conn_pool:
                self.conn_pool.close()
        except Exception as e:
            pass

        if self.scheme == 'http' and self.port == 80 or self.scheme == 'https' and self.port == 443:
            self.base_url = '%s://%s' % (self.scheme, self.host)
        else:
            self.base_url = '%s://%s:%s' % (self.scheme, self.host, self.port)

        if self.has_http:
            self.print_msg('Scan %s' % self.base_url)
        else:
            self.print_msg('Scan %s:%s' %
                           (self.host, self.port) if self.port else 'Scan %s' %
                           self.host)

        if self.has_http:
            if self.scheme == 'https':
                self.conn_pool = HTTPSConnPool(self.host,
                                               port=self.port,
                                               maxsize=self.args.t,
                                               headers=config.default_headers)
            else:
                self.conn_pool = HTTPConnPool(self.host,
                                              port=self.port,
                                              maxsize=self.args.t,
                                              headers=config.default_headers)
            if self.args.require_index_doc:
                self.crawl('/', do_not_process_links=True)

        if self.no_scripts != 1:  # 不是重复目标 80 443 跳转的,不需要重复扫描
            # 当前目标disable, 或者 全局开启插件扫描
            if self.args.scripts_only or not self.no_scripts:
                for _ in self.user_scripts:
                    self.url_queue.put((_, '/'))

        if not self.has_http or self.args.scripts_only:  # 未发现HTTP服务 或  只依赖插件扫描
            return

        self.max_depth = cal_depth(self, self.path)[1] + 5
        if self.args.no_check404:
            self._404_status = 404
        else:
            self.check_404_existence()
        if self._404_status == -1:
            self.print_msg('[Warning] HTTP 404 check failed <%s:%s>' %
                           (self.host, self.port))
        elif self._404_status != 404:
            self.print_msg('[Warning] %s has no HTTP 404.' % self.base_url)
        _path, _depth = cal_depth(self, self.path)

        self.enqueue('/')
        if _path != '/' and not self.log_file:
            self.enqueue(_path)
예제 #2
0
    def init_final(self):
        try:
            if self.conn_pool:
                self.conn_pool.close()
        except Exception as e:
            pass
        default_port = 443 if self.schema.lower() == 'https' else 80
        self.host, self.port = self.host.split(
            ':') if self.host.find(':') > 0 else (self.host, default_port)
        self.port = int(self.port)
        if self.schema == 'http' and self.port == 80 or self.schema == 'https' and self.port == 443:
            self.base_url = '%s://%s' % (self.schema, self.host)
        else:
            self.base_url = '%s://%s:%s' % (self.schema, self.host, self.port)

        is_port_open = self.is_port_open()
        if is_port_open:
            if self.schema == 'https':
                self.conn_pool = HTTPSConnPool(self.host,
                                               port=self.port,
                                               maxsize=self.args.t * 2,
                                               headers=HEADERS)
            else:
                self.conn_pool = HTTPConnPool(self.host,
                                              port=self.port,
                                              maxsize=self.args.t * 2,
                                              headers=HEADERS)

        if self.args.scripts_only or (not is_port_open
                                      and not self.args.no_scripts):
            for _ in self.user_scripts:
                self.url_queue.put((_, '/'))
            print_msg('Scan with scripts: %s' % self.host)
            return

        if not is_port_open:
            return

        self.max_depth = cal_depth(self, self.path)[1] + 5
        if self.args.no_check404:
            self._404_status = 404
            self.has_status_404 = True
        else:
            self.check_404_existence()
        if self._404_status == -1:
            print_msg('[Warning] HTTP 404 check failed <%s:%s>' %
                      (self.host, self.port))
        elif not self.has_status_404:
            print_msg('[Warning] %s has no HTTP 404.' % self.base_url)
        _path, _depth = cal_depth(self, self.path)
        self.enqueue('/')
        self.enqueue(_path)
        if not self.args.no_crawl and not self.log_file:
            self.crawl(_path)
예제 #3
0
파일: BBScan.py 프로젝트: 5alt/BBScan
    def init_final(self):
        try:
            self.conn_pool.close()
        except:
            pass
        default_port = 443 if self.schema.lower() == 'https' else 80
        self.host, self.port = self.host.split(
            ':') if self.host.find(':') > 0 else (self.host, default_port)
        self.port = int(self.port)
        if self.schema == 'http' and self.port == 80 or self.schema == 'https' and self.port == 443:
            self.base_url = '%s://%s' % (self.schema, self.host)
        else:
            self.base_url = '%s://%s:%s' % (self.schema, self.host, self.port)

        is_port_open = self.is_port_open()
        if is_port_open:
            if self.schema == 'https':
                self.conn_pool = HTTPSConnPool(self.host,
                                               port=self.port,
                                               maxsize=self.args.t * 2,
                                               headers=headers)
            else:
                self.conn_pool = HTTPConnPool(self.host,
                                              port=self.port,
                                              maxsize=self.args.t * 2,
                                              headers=headers)

        if not is_port_open:
            return

        self.max_depth = cal_depth(self, self.path)[1] + 5
        if self.args.no_check404:
            self._404_status = 404
            self.has_404 = True
        else:
            self.check_404()  # check existence of HTTP 404
        if not self.has_404:
            print_msg('[Warning] %s has no HTTP 404.' % self.host)

        self.request_index(self.path)
        self.gather_info()

        _path, _depth = cal_depth(self, self.path)
        self._enqueue('/')
        self._enqueue(_path)
        if not self.args.no_crawl and not self.log_file:
            self.crawl_index()
예제 #4
0
class Scanner(object):
    def __init__(self, timeout=600, args=None):
        self.args = args
        self.start_time = time.time()
        self.time_out = timeout
        self.links_limit = 100  # max number of folders to scan

        self._init_rules()
        self._init_scripts()

        self.url_queue = Queue.Queue()  # all urls to scan
        self.urls_processed = set()  # processed urls
        self.urls_enqueued = set()  # entered queue urls
        self.urls_crawled = set()

        self.lock = threading.Lock()
        self.results = {}
        self.log_file = None
        self._404_status = -1
        self.conn_pool = None
        self.index_status, self.index_headers, self.index_html_doc = None, {}, ''
        self.url = ''
        self.schema, self.host, self.port, self.path = None, None, None, None
        self.domain_sub = self.base_url = ''
        self.has_status_404 = True
        self.max_depth = 0
        self.len_404_doc = 0

    # reset scanner
    def reset_scanner(self):
        self.start_time = time.time()
        self.url_queue.queue.clear()
        self.urls_processed.clear()
        self.urls_enqueued.clear()
        self.urls_crawled.clear()
        self.results.clear()
        self.log_file = None
        self._404_status = -1
        self.conn_pool = None
        self.index_status, self.index_headers, self.index_html_doc = None, {}, ''

    # scan from a given URL
    def init_from_url(self, url):
        self.reset_scanner()
        self.url = 'http://' + url if url.find('://') < 0 else url
        self.schema, self.host, self.path = parse_url(url)
        self.domain_sub = get_domain_sub(self.host)
        self.init_final()

    def init_from_log_file(self, log_file):
        self.reset_scanner()
        self.log_file = log_file
        self.schema, self.host, self.path = self._parse_url_from_file()
        self.domain_sub = get_domain_sub(self.host)
        if self.host:
            self.load_all_urls_from_log_file()
            self.init_final()
            return True
        else:
            host = os.path.basename(log_file).replace('.log', '')
            try:
                socket.gethostbyname(host)
                self.init_from_url(host)
                return True
            except Exception as e:
                print_msg('[ERROR] Invalid host from log name: %s' % host)
                return False

    #
    def init_final(self):
        try:
            if self.conn_pool:
                self.conn_pool.close()
        except Exception as e:
            pass
        default_port = 443 if self.schema.lower() == 'https' else 80
        self.host, self.port = self.host.split(
            ':') if self.host.find(':') > 0 else (self.host, default_port)
        self.port = int(self.port)
        if self.schema == 'http' and self.port == 80 or self.schema == 'https' and self.port == 443:
            self.base_url = '%s://%s' % (self.schema, self.host)
        else:
            self.base_url = '%s://%s:%s' % (self.schema, self.host, self.port)

        is_port_open = self.is_port_open()
        if is_port_open:
            if self.schema == 'https':
                self.conn_pool = HTTPSConnPool(self.host,
                                               port=self.port,
                                               maxsize=self.args.t * 2,
                                               headers=HEADERS)
            else:
                self.conn_pool = HTTPConnPool(self.host,
                                              port=self.port,
                                              maxsize=self.args.t * 2,
                                              headers=HEADERS)
                # 301 redirect to https
                status, headers, html_doc = self.http_request('/')
                location = headers.get('Location', '')
                if status == 301 and location.startswith('https://'):
                    self.base_url = location.rstrip('/')
                    _, loc_host, _ = parse_url(location)
                    port = int(loc_host.split(':')
                               [1]) if loc_host.find(':') > 0 else 443
                    self.conn_pool = HTTPSConnPool(self.host,
                                                   port=port,
                                                   maxsize=self.args.t * 2,
                                                   headers=HEADERS)
                    print_msg('301 redirect: %s' % location)

        if self.args.scripts_only or (not is_port_open
                                      and not self.args.no_scripts):
            for _ in self.user_scripts:
                self.url_queue.put((_, '/'))
            print_msg('Scan with scripts: %s' % self.host)
            return

        if not is_port_open:
            return

        self.max_depth = cal_depth(self, self.path)[1] + 5
        if self.args.no_check404:
            self._404_status = 404
            self.has_status_404 = True
        else:
            self.check_404_existence()
        if self._404_status == -1:
            print_msg('[Warning] HTTP 404 check failed <%s:%s>' %
                      (self.host, self.port))
        elif not self.has_status_404:
            print_msg('[Warning] %s has no HTTP 404.' % self.base_url)
        _path, _depth = cal_depth(self, self.path)
        self.enqueue('/')
        self.enqueue(_path)
        if not self.args.no_crawl and not self.log_file:
            self.crawl(_path)

    def is_port_open(self):
        try:
            s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            s.settimeout(5.0)
            if s.connect_ex((self.host, int(self.port))) == 0:
                print_msg('scan web: %s:%s' % (self.host, self.port))
                return True
            else:
                print_msg('[Warning] Fail to connect to %s' % self.base_url)
                return False
        except Exception as e:
            return False
        finally:
            s.setsockopt(socket.SOL_SOCKET, socket.SO_LINGER,
                         struct.pack('ii', 1, 0))
            s.close()

    #
    def _parse_url_from_file(self):
        url = ''
        with open(self.log_file) as infile:
            for _line in infile.xreadlines():
                _line = _line.strip()
                if _line and len(_line.split()) >= 3:
                    url = _line.split()[1]
                    break
        return parse_url(url)

    #
    # load urls from rules/*.txt
    def _init_rules(self):
        self.text_to_find = []
        self.regex_to_find = []
        self.text_to_exclude = []
        self.regex_to_exclude = []
        self.rules_set = set()
        self.rules_set_root_only = set()

        p_tag = re.compile('{tag="(.*?)"}')
        p_status = re.compile(r'{status=(\d{3})}')
        p_content_type = re.compile('{type="(.*?)"}')
        p_content_type_no = re.compile('{type_no="(.*?)"}')

        for rule_file in glob.glob('rules/*.txt'):
            with open(rule_file, 'r') as infile:
                vul_type = os.path.basename(rule_file)[:-4]
                for url in infile.xreadlines():
                    url = url.strip()
                    if url.startswith('/'):
                        _ = p_tag.search(url)
                        tag = _.group(1) if _ else ''

                        _ = p_status.search(url)
                        status = int(_.group(1)) if _ else 0

                        _ = p_content_type.search(url)
                        content_type = _.group(1) if _ else ''

                        _ = p_content_type_no.search(url)
                        content_type_no = _.group(1) if _ else ''

                        root_only = True if url.find(
                            '{root_only}') >= 0 else False

                        rule = (url.split()[0], tag, status, content_type,
                                content_type_no, root_only, vul_type)
                        if root_only:
                            if rule not in self.rules_set_root_only:
                                self.rules_set_root_only.add(rule)
                            else:
                                print_msg('Duplicated root only rule: %s' %
                                          str(rule))
                        else:
                            if rule not in self.rules_set:
                                self.rules_set.add(rule)
                            else:
                                print_msg('Duplicated rule: %s' % str(rule))

        re_text = re.compile('{text="(.*)"}')
        re_regex_text = re.compile('{regex_text="(.*)"}')

        file_path = 'rules/white.list'
        if not os.path.exists(file_path):
            print_msg('[ERROR] File not exist: %s' % file_path)
            return
        for _line in open(file_path):
            _line = _line.strip()
            if not _line or _line.startswith('#'):
                continue
            _m = re_text.search(_line)
            if _m:
                self.text_to_find.append(_m.group(1).decode('utf-8', 'ignore'))
            else:
                _m = re_regex_text.search(_line)
                if _m:
                    self.regex_to_find.append(
                        re.compile(_m.group(1).decode('utf-8', 'ignore')))

        file_path = 'rules/black.list'
        if not os.path.exists(file_path):
            print_msg('[ERROR] File not exist: %s' % file_path)
            return
        for _line in open(file_path):
            _line = _line.strip()
            if not _line or _line.startswith('#'):
                continue
            _m = re_text.search(_line)
            if _m:
                self.text_to_exclude.append(
                    _m.group(1).decode('utf-8', 'ignore'))
            else:
                _m = re_regex_text.search(_line)
                if _m:
                    self.regex_to_exclude.append(
                        re.compile(_m.group(1).decode('utf-8', 'ignore')))

    #
    def _init_scripts(self):
        self.user_scripts = []
        if self.args.no_scripts:  # disable user scripts scan
            return
        for _script in glob.glob('scripts/*.py'):
            script_name = os.path.basename(_script).replace('.py', '')
            if script_name.startswith('_'):
                continue
            try:
                self.user_scripts.append(
                    importlib.import_module('scripts.%s' % script_name))
            except Exception as e:
                print_msg('[ERROR] Fail to load script %s' % script_name)

    #
    def http_request(self, url, headers=HEADERS, timeout=30):
        try:
            if not url:
                url = '/'
            # print_msg('request %s' % self.base_url + url)
            resp = self.conn_pool.urlopen('GET',
                                          self.base_url + url,
                                          headers=headers,
                                          redirect=False,
                                          timeout=timeout,
                                          retries=0)
            status = resp.status
            if resp.headers.get('content-type', '').find('text') >= 0 \
                    or resp.headers.get('content-type', '').find('html') >= 0 \
                    or int(resp.headers.get('content-length', '0')) <= 20480:  # 1024 * 20
                html_doc = decode_response_text(resp.data)
            else:
                html_doc = ''

            return status, resp.headers, html_doc
        except Exception as e:
            return -1, {}, ''

    # check existence of HTTP 404
    def check_404_existence(self):
        try:
            try:
                self._404_status, _, html_doc = self.http_request(
                    '/BBScan-404-existence-check')
            except Exception as e:
                print_msg('[Warning] HTTP 404 check failed <%s:%s>' %
                          (self.host, self.port))
                self._404_status, _, html_doc = -1, {}, ''
            if self._404_status == 404:
                self.has_status_404 = True
            else:
                self.has_status_404 = False
                self.len_404_doc = len(html_doc)
        except Exception as e:
            logging.error('[Check_404] Exception %s %s' %
                          (self.base_url, str(e)))

    #
    def enqueue(self, url):
        try:
            url = str(url)
            url_pattern = re.sub(r'\d+', '{num}', url)
            if url_pattern in self.urls_processed or len(
                    self.urls_processed) >= self.links_limit:
                return False
            else:
                self.urls_processed.add(url_pattern)
            # print_msg('Entered Queue: %s' % url)
            self.crawl(url)
            if self._404_status != -1:  # valid web service
                rule_set_to_process = [
                    self.rules_set, self.rules_set_root_only
                ] if url == '/' else [self.rules_set]
                for rule_set in rule_set_to_process:
                    for _ in rule_set:
                        if _[5] and url != '/':  # root only
                            continue
                        try:
                            full_url = url.rstrip('/') + _[0]
                        except Exception as e:
                            continue
                        if full_url in self.urls_enqueued:
                            continue
                        url_description = {
                            'prefix': url.rstrip('/'),
                            'full_url': full_url
                        }
                        item = (url_description, _[1], _[2], _[3], _[4], _[5],
                                _[6])
                        self.url_queue.put(item)
                        self.urls_enqueued.add(full_url)

            if self.args.full_scan and url.count('/') >= 2:
                self.enqueue('/'.join(url.split('/')[:-2]) +
                             '/')  # sub folder enqueue

            for script in self.user_scripts:
                self.url_queue.put((script, url))
            return True
        except Exception as e:
            print '[_enqueue.exception] %s' % str(e)
            return False

    #
    def crawl(self, path):
        try:
            headers = dict(
                HEADERS,
                Range='bytes=0-204800')  # allowed size increased to 200 kb
            status, headers, html_doc = self.http_request(path,
                                                          headers=headers)
            if path == '/':
                self.index_status, self.index_headers, self.index_html_doc = status, headers, html_doc
            if self.index_html_doc:
                soup = BeautifulSoup(html_doc, "html.parser")
                for link in soup.find_all('a'):
                    url = link.get('href', '').strip()
                    if url.startswith('..'):
                        continue
                    if not url.startswith('/') and url.find('//') < 0:
                        url = path + url
                    url, depth = cal_depth(self, url)
                    # print url, depth
                    if depth <= self.max_depth:
                        self.enqueue(url)
                ret = self.find_text(html_doc)
                if ret:
                    if '/' not in self.results:
                        self.results['/'] = []
                    m = re.search('<title>(.*?)</title>', html_doc)
                    title = m.group(1) if m else ''
                    _ = {
                        'status': status,
                        'url': '%s%s' % (self.base_url, path),
                        'title': title,
                        'vul_type': ret[1]
                    }
                    if _ not in self.results['/']:
                        self.results['/'].append(_)

        except Exception as e:
            print_msg('[crawl Exception] %s %s' % (path, str(e)))
            traceback.print_exc()

    #
    def load_all_urls_from_log_file(self):
        try:
            with open(self.log_file) as infile:
                for _line in infile.xreadlines():
                    _ = _line.strip().split()
                    if len(_) == 3 and (_[2].find('^^^200') > 0
                                        or _[2].find('^^^403') > 0
                                        or _[2].find('^^^302') > 0):
                        url, depth = cal_depth(self, _[1])
                        self.enqueue(url)
        except Exception as e:
            print_msg('[load_all_urls_from_log_file] %s' % str(e))

    #
    def find_text(self, html_doc):
        for _text in self.text_to_find:
            if html_doc.find(_text) >= 0:
                return True, 'Found [%s]' % _text
        for _regex in self.regex_to_find:
            if _regex.search(html_doc):
                return True, 'Found Regex [%s]' % _regex.pattern
        return False

    #
    def find_exclude_text(self, html_doc):
        for _text in self.text_to_exclude:
            if html_doc.find(_text) >= 0:
                return True
        for _regex in self.regex_to_exclude:
            if _regex.search(html_doc):
                return True
        return False

    #
    def scan_worker(self):
        while self.url_queue.qsize() > 0:
            if time.time() - self.start_time > self.time_out:
                self.url_queue.queue.clear()
                print_msg('[ERROR] Timed out task: %s' % self.base_url)
                return
            try:
                item = self.url_queue.get(timeout=0.1)
            except Exception as e:
                return
            try:
                if len(item) == 2:  # Script Scan
                    check_func = getattr(item[0], 'do_check')
                    # print_msg('Begin %s %s' % (os.path.basename(item[0].__file__), item[1]))
                    check_func(self, item[1])
                    # print_msg('End %s %s' % (os.path.basename(item[0].__file__), item[1]))
                    continue
                else:
                    url_description, tag, status_to_match, content_type, content_type_no, root_only, vul_type = item
                    prefix = url_description['prefix']
                    url = url_description['full_url']

                    if url.find('{sub}') >= 0:
                        if not self.domain_sub:
                            continue
                        url = url.replace('{sub}', self.domain_sub)

            except Exception as e:
                print_msg('[scan_worker.1] %s' % str(e))
                traceback.print_exc()
                continue
            if not item or not url:
                break

            # print_msg('[%s]' % url.strip())
            try:
                status, headers, html_doc = self.http_request(url)
                cur_content_type = headers.get('content-type', '')
                cur_content_length = headers.get('content-length',
                                                 len(html_doc))

                if self.find_exclude_text(html_doc):  # excluded text found
                    continue

                if 0 <= int(cur_content_length) <= 10:  # text too short
                    continue

                if cur_content_type.find('image/') >= 0:  # exclude image
                    continue

                if content_type != 'application/json' and cur_content_type.find('application/json') >= 0 and \
                        not url.endswith('.json'):    # invalid json
                    continue

                if content_type and cur_content_type.find(content_type) < 0 \
                        or content_type_no and cur_content_type.find(content_type_no) >= 0:
                    continue  # content type mismatch

                if tag and html_doc.find(tag) < 0:
                    continue  # tag mismatch

                if self.find_text(html_doc):
                    valid_item = True
                else:
                    # status code check
                    if status_to_match == 206 and status != 206:
                        continue
                    if status_to_match in (200, 206) and status in (200, 206):
                        valid_item = True
                    elif status_to_match and status != status_to_match:
                        continue
                    elif status in (403, 404) and status != status_to_match:
                        continue
                    else:
                        valid_item = True

                    if status == self._404_status and url != '/':
                        len_doc = len(html_doc)
                        len_sum = self.len_404_doc + len_doc
                        if len_sum == 0 or (0.4 <= float(len_doc) / len_sum <=
                                            0.6):
                            continue

                if valid_item:
                    m = re.search('<title>(.*?)</title>', html_doc)
                    title = m.group(1) if m else ''
                    self.lock.acquire()
                    # print '[+] [Prefix:%s] [%s] %s' % (prefix, status, 'http://' + self.host +  url)
                    if prefix not in self.results:
                        self.results[prefix] = []
                    _ = {
                        'status': status,
                        'url': '%s%s' % (self.base_url, url),
                        'title': title,
                        'vul_type': vul_type
                    }
                    if _ not in self.results[prefix]:
                        self.results[prefix].append(_)
                    self.lock.release()
            except Exception as e:
                print_msg('[scan_worker.2][%s] %s' % (url, str(e)))
                traceback.print_exc()

    #
    def scan(self, threads=6):
        try:
            all_threads = []
            for i in range(threads):
                t = threading.Thread(target=self.scan_worker)
                t.start()
                all_threads.append(t)
            for t in all_threads:
                t.join()

            for key in self.results.keys():
                if len(
                        self.results[key]
                ) > 5:  # Over 5 URLs found under this folder, show first only
                    self.results[key] = self.results[key][:1]
            return self.host, self.results
        except Exception as e:
            print '[scan exception] %s' % str(e)
        self.conn_pool.close()