Example #1
0
def masscan(target, ports):
    output = os.path.join(PATHS.OUTPUT_PATH,
                          "output_" + str(time.time()) + ".log")
    cmd = "masscan -p {} --rate={} --randomize-hosts -iL \"{}\" -oL \"{}\"".format(
        ports, MASSCAN_RATE, target, output)
    os.system(cmd)
    logger.debug("masscan saved output:" + output)
    open_list = []

    with open(output, "r") as f:
        result_json = f.readlines()
    if result_json:
        try:
            del result_json[0]
            del result_json[-1]
            open_list = {}
            for res in result_json:
                try:
                    p = res.split()
                    ip = p[3]
                    port = p[2]
                    if ip not in open_list:
                        open_list[ip] = set()
                    open_list[ip].add(port)
                except:
                    pass

        except Exception as e:
            logger.error("masscan read faild")
    if open_list:
        return open_list
    return None
Example #2
0
def load_remote_poc():
    filename = os.path.join(PATHS.DATA_PATH, "api.json")
    api_lock = os.path.join(PATHS.DATA_PATH, "api.lock")
    # 每隔10天更新一次api
    if not os.path.exists(api_lock):
        with open(api_lock, "w") as f:
            f.write(str(time.time()))

    with open(api_lock) as f:
        last_time = float(f.read())

    logger.debug("api last time:{}".format(last_time))
    if time.time() - last_time > 60 * 60 * 24 * 10:
        with open(api_lock, "w") as f:
            f.write(str(time.time()))
        logger.info("update airbug api...")
        _middle = "/master"
        _suffix = "/API.json"
        _profix = WEB_REPOSITORY.replace("github.com",
                                         "raw.githubusercontent.com")
        _api = _profix + _middle + _suffix
        r = requests.get(_api)
        datas = json.loads(r.text, encoding='utf-8')
        for data in datas:
            data["webfile"] = _profix + _middle + data["filepath"]
        with open(filename, "w") as f:
            json.dump(datas, f)

    with open(filename) as f:
        datas = json.load(f)
    return datas
Example #3
0
def update_conf(path, section, option, value):
    logger.debug("Update tentacle config: [%s][%s] => %s" %
                 (section, option, value))
    cf = configparser.ConfigParser()
    cf.set(section, option, value)
    with open(path, 'w+') as configfile:
        cf.write(configfile)
Example #4
0
 def __del__(self):
     '''
     销毁
     '''
     self.conn.commit()
     self.conn.close()
     logger.debug('destroy database object')
Example #5
0
async def go_request(req_list, source):
    async with ClientSession() as session:
        for req in req_list:
            url = req['url']
            method = req['method']
            headers = req['headers']
            logger.debug("Curling %s..." % (url))

            proxy = conf['config']['crawlergo']['http_proxy']
            username = conf['config']['crawlergo']['username']
            password = conf['config']['crawlergo']['password']

            if username.strip() != '' and password.strip() != '':
                proxy_auth = BasicAuth(username, password)
            else:
                proxy_auth = None
            try:
                logger.debug("Xray scan {}, from url {} ".format(url, source))
                async with session.request(method,
                                           url=url,
                                           headers=headers,
                                           proxy=proxy,
                                           proxy_auth=proxy_auth) as res:
                    pass
            except:
                pass
Example #6
0
def nmapscan(host, ports):
    # 接受从masscan上扫描出来的结果
    # 为了可以多线程使用,此函数支持多线程调用
    nm = nmap.PortScanner()
    argument = "-sV -sS -Pn --host-timeout 1m -p{}".format(','.join(ports))
    try:
        ret = nm.scan(host, arguments=argument)
    except nmap.PortScannerError:
        logger.debug("Nmap PortScannerError host:{}".format(host))
        return None
    except:
        return None

    # debug
    elapsed = ret["nmap"]["scanstats"]["elapsed"]
    command_line = ret["nmap"]["command_line"]
    logger.debug("[nmap] successed,elapsed:%s command_line:%s" %
                 (elapsed, command_line))

    if host in ret["scan"]:
        try:
            result = ret["scan"][host]["tcp"]
        except KeyError:
            return None
        return result

    return None
Example #7
0
    async def run(self):
        async with aiohttp.ClientSession() as session:

            flag = await self.check_engine_available(session, self.engine)
            if not flag:
                logger.error(
                    "{engine_name} is not available, skipping!".format(
                        engine_name=self.engine_name))
                return
            logger.debug("{engine_name} is available, starting!".format(
                engine_name=self.engine_name))

            data = {'inputurl': self.target}
            content = await self.get(session,
                                     self.base_url,
                                     method="POST",
                                     data=data,
                                     headers=self.headers,
                                     timeout=self.timeout,
                                     proxy=self.proxy)

            ret = self.check_response_errors(content)
            if not ret[0]:
                self.deal_with_errors(ret[1])

            self.extract(content)
            logger.sysinfo("{engine} Found {num} sites".format(
                engine=self.engine_name, num=len(self.results['subdomain'])))
            logger.debug(self.engine_name + " " +
                         str(len(self.results['subdomain'])))
Example #8
0
def init_conf(path):
    logger.debug("Init tentacle config...")
    configs = {
        "basic": {
            "thread_num": "100",
            "looptimer": str(12*60*60),
            "timeout": "5",
            "user_agent": '\n'.join([
                "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36",
                'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.2.3) Gecko/20100401 Firefox/3.0.16 (.NET CLR 3.5.30729)',
                'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Chrome/7.0.514.0 Safari/534.7',
                'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_3; de-de) AppleWebKit/531.22.7 (KHTML, like Gecko) Version/4.0.5 Safari/531.22.7',
                'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.4 (KHTML, like Gecko) Chrome/6.0.481.0 Safari/534.4',
                'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-GB; rv:1.9.1b4) Gecko/20090423 Firefox/3.5b4 (.NET CLR 3.5.30729)',
                'Mozilla/5.0 (Windows; U; Windows NT 6.0; nb-NO) AppleWebKit/533.18.1 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5',
                'Mozilla/5.0 (X11; U; Linux i686; de; rv:1.9.0.14) Gecko/2009082505 Red Hat/3.0.14-1.el5_4 Firefox/3.0.14',
                'Mozilla/5.0 (X11; U; Linux i686; tr-TR; rv:1.9.0.10) Gecko/2009042523 Ubuntu/9.04 (jaunty) Firefox/3.0.10',
                'Opera/9.80 (Macintosh; Intel Mac OS X; U; nl) Presto/2.6.30 Version/10.61',
            ])

        },
        "smtp": {
            "mail_host": "smtp.163.com",
            "mail_port": str(465),
            "mail_user": "******",
            "mail_pass": "******",
            "sender": "*****@*****.**",
            "receivers":"[email protected],[email protected]",
        },
        "proxy": {
            "proxy": False,
            "http_proxy": "http://127.0.0.1:1080",
            "https_proxy": "https://127.0.0.1:1080"
        },
        "google_api": {
            "developer_key": "developer_key",
            "search_enging": "search_enging"
        },
        # 下面接口以后再说
        # "zoomeye_api": {
        #     "username": "******",
        #     "password": "******"
        # },
        # "fofa_api": {
        #     "email": "*****@*****.**",
        #     "token": "*****@*****.**"
        # },
        # "shodan_api": {
        #     "token": "token@tentacle"
        # },
        # "github_api": {
        #     "token": "token@tentacle",
        # },
    }
    cf = configparser.ConfigParser()
    for section in configs.keys():
        cf[section] = configs[section]
    with open(path, 'w+') as configfile:
        cf.write(configfile)
    sys.exit(logger.error("Please set the tentacle config in submon.conf..."))
Example #9
0
    def extract(self, content):
        pattern = re.compile(
            '<a href="javascript:" onclick="window.open.*?" target="_blank">(.*?{domain})</a>'
            .format(domain=self.target))
        next_page = "下一页"
        try:
            links = pattern.findall(content)

            for link in links:
                if not link.startswith('http://') and not link.startswith(
                        'https://'):
                    link = "http://" + link

                subdomain = parse.urlparse(link).netloc

                if subdomain != self.target and subdomain.endswith(
                        self.target):
                    if subdomain not in self.results['subdomain']:
                        logger.debug("{engine} Found {subdomain}".format(
                            engine=self.engine_name, subdomain=subdomain))
                        self.results['subdomain'].append(subdomain)
        except Exception:
            pass
        if next_page in content:
            # tell engine there still be next page
            return True
        else:
            return False
Example #10
0
 def __del__(self):
     '''
     销毁
     '''
     self.conn.commit()
     self.conn.close()
     logger.debug('destroy database object')
Example #11
0
    def hand_ip(self, serviceTypes, option='masscan'):
        ip_list = []

        for item in serviceTypes:
            ip_list.append(item["target"])
        ports = MASSCAN_DEFAULT_PORT
        result2 = {}
        if option == 'masscan':
            if MASSCAN_FULL_SCAN:
                ports = "1-65535"
            target = os.path.join(PATHS.OUTPUT_PATH,
                                  "target_{0}.log".format(time.time()))
            with open(target, "w+") as fp:
                fp.write('\n'.join(ip_list))
            logger.debug("ip:" + repr(ip_list))
            try:
                result = masscan(target, ports)
            except Exception as e:
                logger.error("masscan error msg:{}".format(repr(e)))
                result = None
            if result is None:
                return None
            # format:{'115.159.39.75': ['80'], '115.159.39.215': ['80', '3306'],}
            for host, ports in result.items():
                ports = list(ports)
                if host not in result2:
                    result2[host] = []
                task_update("running", 1)
                try:
                    result_nmap = nmapscan(host, ports)
                except:
                    result_nmap = None
                task_update("running", -1)
                if result_nmap is None:
                    for tmp_port in ports:
                        result2[host].append({"port": tmp_port})
                    continue
                tmp_r = self.nmap_result_handle(result_nmap, host=host)
                result2.update(tmp_r)
        elif option == "nmap":
            logger.debug("ip:" + repr(ip_list))
            for host in ip_list:
                result_nmap = nmapscan(host, ports.split(","))
                tmp_r = self.nmap_result_handle(result_nmap, host=host)
                if tmp_r:
                    result2.update(tmp_r)

        data = {}
        for ip in result2.keys():
            # result2[ip]
            if ip not in data:
                data[ip] = {}
            d = ip_location.poc(ip)
            if d:
                data[ip]["location"] = d
            data[ip]["infos"] = result2[ip]

        collector.add_ips(data)
        for ip in result2.keys():
            collector.send_ok_ip(ip)
Example #12
0
    async def run(self):
        async with aiohttp.ClientSession() as session:

            flag = await self.check_engine_available(session, self.engine)
            if not flag:
                logger.error(
                    "{engine_name} is not available, skipping!".format(
                        engine_name=self.engine_name))
                return
            logger.debug("{engine_name} is available, starting!".format(
                engine_name=self.engine_name))

            data = {'inputurl': self.target}
            async with session.post(self.base_url, proxy=self.proxy,
                                    data=data) as res:
                if res != None:
                    try:
                        content = await res.text()
                    except:
                        content = ""

                    ret = self.check_response_errors(content)
                    if not ret[0]:
                        self.deal_with_errors(ret[1])

                    self.extract(content)

            logger.sysinfo("{engine} Found {num} sites".format(
                engine=self.engine_name, num=len(self.results['subdomain'])))
            logger.debug(self.engine_name + " " +
                         str(len(self.results['subdomain'])))
Example #13
0
def load_conf(path):
    logger.debug("Load tentacle config...")
    cf = configparser.ConfigParser()
    cf.read(path)
    sections = cf.sections()
    configs = {}
    for section in sections:
        logger.debug("Load config: %s" % (section))
        config = {}
        for option in cf.options(section):
            config[option] = cf.get(section,option)
        configs[section] = config
    conf['config'] = configs
Example #14
0
    def recursion_deep(self):
        '''
        根据深度值进行爬取
        operate['db'].deep 当前深度
        self.deep 需要爬取的深度
        :return:
        '''
        if operate['db'].deep == 0:
            logger.info("spidering deep == 0 page")
            r = self.get_html(self.url)
            try:
                html = r['html']
            except:
                print "url input error!"
                logger.error("url error(%s)" % (self.url))
                return

            operate['db'].insert(html, self.url)
            self.r_group.append(r)
            operate['db'].deep += 1
            self.recursion_deep()
        elif operate['db'].deep > self.deep:
            logger.info('spider deep over!')
            return
        else:
            logger.info("spidering deep = %s" % operate['db'].deep)
            tmp = []
            url_group = []

            # 从上一个deep爬取的页面中提取url
            for x in self.r_group:
                html = x['html']
                url_group.extend(self.find_url(html))
                logger.debug("from %s page find %s url" %
                             (x['url'], len(url_group)))

            # 当页面没匹配出任何url, 则结束退出
            if url_group == []:
                return
            # 把提取出来的url丢入线程池中
            result_list = self._thread.my_map(url_group)
            for y in xrange(len(result_list)):
                if result_list[y]['type'] == 'html':
                    tmp.append(result_list[y])
                else:
                    logger.debug("delete the not html page (%s)" %
                                 url_group[y])

            self.r_group = tmp
            operate['db'].deep += 1
            self.recursion_deep()
Example #15
0
 def find_url(self, html):
     '''
     使用BeautifulSoup找出网页中的url
     :param html: html页面
     :return: 返回一个list, 其值为html中url
     PS: 暂只考虑a标签中的href属性中的url
     '''
     url_group = []
     logger.debug("start find url in a html")
     try:
         bs = BeautifulSoup(html, 'lxml')
     except Exception, e:
         logger.error("bs4(html) fail!\nthe error info is : " + str(e))
         return
Example #16
0
 def find_url(self, html):
     '''
     使用BeautifulSoup找出网页中的url
     :param html: html页面
     :return: 返回一个list, 其值为html中url
     PS: 暂只考虑a标签中的href属性中的url
     '''
     url_group = []
     logger.debug("start find url in a html")
     try:
         bs = BeautifulSoup(html, 'lxml')
     except Exception, e:
         logger.error("bs4(html) fail!\nthe error info is : " + str(e))
         return
Example #17
0
    def recursion_deep(self):
        '''
        根据深度值进行爬取
        operate['db'].deep 当前深度
        self.deep 需要爬取的深度
        :return:
        '''
        if operate['db'].deep == 0:
            logger.info("spidering deep == 0 page")
            r = self.get_html(self.url)
            try:
                html = r['html']
            except:
                print "url input error!"
                logger.error("url error(%s)" %(self.url))
                return

            operate['db'].insert(html, self.url)
            self.r_group.append(r)
            operate['db'].deep += 1
            self.recursion_deep()
        elif operate['db'].deep > self.deep:
            logger.info('spider deep over!')
            return
        else:
            logger.info("spidering deep = %s" %operate['db'].deep)
            tmp = []
            url_group = []

            # 从上一个deep爬取的页面中提取url
            for x in self.r_group:
                html = x['html']
                url_group.extend(self.find_url(html))
                logger.debug("from %s page find %s url" %(x['url'], len(url_group)))

            # 当页面没匹配出任何url, 则结束退出
            if url_group == []:
                return
            # 把提取出来的url丢入线程池中
            result_list = self._thread.my_map(url_group)
            for y in xrange(len(result_list)):
                if result_list[y]['type'] == 'html':
                    tmp.append(result_list[y])
                else:
                    logger.debug("delete the not html page (%s)" % url_group[y])

            self.r_group = tmp
            operate['db'].deep += 1
            self.recursion_deep()
Example #18
0
    def insert(self, html, url):
        '''
        必须参数
        :param html: 抓取的页面内容
        :param url: 抓取页面的url
        :param deep: 该页面的深度
        :return:
        '''
        in_sql = "INSERT INTO spider VALUES (null, ?, ?, ?, ?)"
        # 当设置了关键词然后关键词不在页面中时
        if conf['key'] and conf['key'] not in html:
            logger.debug("the address: "+url+" @@ is not exist the key: " + conf['key'])
            return None

        keyword = (conf['key'] if conf['key'] else '')
        self.cur.execute(in_sql, (html, url, self.deep, keyword))
        logger.debug("INSERT suc and id is: " + str(self.cur.lastrowid))
        self.conn.commit()
Example #19
0
    def insert(self, html, url):
        '''
        必须参数
        :param html: 抓取的页面内容
        :param url: 抓取页面的url
        :param deep: 该页面的深度
        :return:
        '''
        in_sql = "INSERT INTO spider VALUES (null, ?, ?, ?, ?)"
        # 当设置了关键词然后关键词不在页面中时
        if conf['key'] and conf['key'] not in html:
            logger.debug("the address: " + url + " @@ is not exist the key: " +
                         conf['key'])
            return None

        keyword = (conf['key'] if conf['key'] else '')
        self.cur.execute(in_sql, (html, url, self.deep, keyword))
        logger.debug("INSERT suc and id is: " + str(self.cur.lastrowid))
        self.conn.commit()
Example #20
0
async def get_title(req_list):
    ret = []
    async with ClientSession() as session:
        for subdomain in req_list:
            try:
                logger.debug("Curling %s..." % (subdomain))
                flag = False
                for pro in ['http://', "https://"]:
                    url = pro + subdomain + '/'
                    async with session.get(url=url) as response:
                        if response != None:
                            try:
                                res = await response.read()
                            except:
                                res = ""
                            status = response.status
                            try:
                                res = str(res, 'utf-8')
                            except UnicodeDecodeError:
                                res = str(res, 'gbk')
                            except:
                                res = "网页编码错误"

                            m = re.search('<title>(.*)<\/title>', res.lower())
                            if m != None and m.group(1):
                                title = m.group(1)
                            else:
                                title = '网页没有标题'

                            try:
                                length = int(
                                    response.headers['content-length'])
                            except:
                                length = len(str(response.headers)) + len(res)

                            ret.append([subdomain, url, title, status, length])
                            flag = True
                            break
                if not flag:
                    ret.append([subdomain, "", "", 0, 0])
            except Exception as e:
                logger.error(str(e))
    return ret
Example #21
0
    def execute(self, request: Request, response: Response):
        self.target = ''
        self.requests = request
        self.response = response
        output = None
        try:
            output = self.audit()
        except NotImplementedError:
            logger.error('Plugin: {0} not defined "{1} mode'.format(
                self.name, 'audit'))

        except ConnectTimeout:
            retry = RETRY
            while retry > 0:
                logger.debug('Plugin: {0} timeout, start it over.'.format(
                    self.name))
                try:
                    output = self.audit()
                    break
                except ConnectTimeout:
                    logger.debug('POC: {0} time-out retry failed!'.format(
                        self.name))
                retry -= 1
            else:
                msg = "connect target '{0}' failed!".format(self.target)
                logger.error(msg)

        except HTTPError as e:
            logger.warning(
                'Plugin: {0} HTTPError occurs, start it over.'.format(
                    self.name))

        except ConnectionError as e:
            msg = "connect target '{0}' failed!".format(self.target)
            logger.error(msg)

        except TooManyRedirects as e:
            logger.error(str(e))

        except Exception as e:
            logger.error(str(e))

        return output
Example #22
0
 def __init__(self, dbfile):
     '''
     初始化
     :param dbfile: sqlite3 数据库文件
     '''
     logger.debug('init database')
     self.deep = 0
     dbfile = "db/" + dbfile
     self.conn = sqlite3.connect(dbfile, check_same_thread=False)
     self.cur = self.conn.cursor()
     check_table_sql = "select count(*) from sqlite_master where type='table' and name='spider'"
     self.cur.execute(check_table_sql)
     if self.cur.fetchone()[0] == 0:
         logger.debug('create a spider table')
         self.creab_table()
     else:
         logger.debug('spider table already exist')
Example #23
0
 def __init__(self, dbfile):
     '''
     初始化
     :param dbfile: sqlite3 数据库文件
     '''
     logger.debug('init database')
     self.deep = 0
     dbfile = "db/" + dbfile
     self.conn = sqlite3.connect(dbfile, check_same_thread=False)
     self.cur = self.conn.cursor()
     check_table_sql = "select count(*) from sqlite_master where type='table' and name='spider'"
     self.cur.execute(check_table_sql)
     if self.cur.fetchone()[0] == 0:
         logger.debug('create a spider table')
         self.creab_table()
     else:
         logger.debug('spider table already exist')
Example #24
0
def oparser():
    '''
        设置, 初始化输入参数
    '''
    parser = OptionParser()
    parser.version = "B0.8"

    parser.add_option("--version", "-v", dest="showVersion",
                      action="store_true",
                      help="Show program's version number and exit")
    # 必选参数
    target = OptionGroup(parser, "Target", "At least one of these "
                                           "options has to be provided to define the target(s)")

    target.add_option("-u", dest="url", help="Target URL")
    target.add_option("-d", dest="deep", type="int", help="spider the depth")
    target.add_option("--testself", dest="test", action="store_true", help="auto test")

    # 可选参数
    opt = OptionGroup(parser, "Options", "Optional parameters")
    opt.add_option("-f", dest="logfile", help="The custom log file path")
    opt.add_option("--key", dest="key", help="Page keywords")
    opt.add_option("-l", dest="loglevel", type="int", help="log level(1-5) "
                                                        "1, CRITICAL; "
                                                        "2, ERROR(default); "
                                                        "3, WARN; "
                                                        "4, INFO; "
                                                        "5, DEBUG;")
    opt.add_option("--thread", dest="thread", type="int", help="thread number(default 10)")
    opt.add_option("--dbfile", dest="dbfile", help="set sqlite database file")

    parser.add_option_group(target)
    parser.add_option_group(opt)

    (args, _) = parser.parse_args(sys.argv)

    if args.showVersion:
        print parser.version
        print "-- By Hcamael"
        exit(0)

    if not (args.url or args.test):
        errMsg = "missing a mandatory option (-u) or (--testself), "
        errMsg += "use -h for basic or -hh for advanced help"
        parser.error(errMsg)

    # 进行输入参数初始化
    conf['url'] = (args.url if args.url else "http://sina.com.cn")
    name = re.findall("[\w\.-]+", conf['url'])
    try:
        conf['name'] = (name[1] if len(name) == 2 else name[0])
    except IndexError:
        errMsg = "url input error!"
        logger.error("url matching fail!")
        parser.error(errMsg)

    conf['deep'] = (args.deep if args.deep else 2)
    if conf['deep'] > 50:
        # 上限为50
        errMsg = "The deep is too large(0 <= deep <= 50)"
        parser.error(errMsg)

    if conf['deep'] < 0:
        # 禁止小于0
        errMsg = "The deep input error(0 < deep <= 50)"
        parser.error(errMsg)

    conf['test'] = args.test
    conf['key'] = args.key
    conf['loglevel'] = (args.loglevel if args.loglevel else 2)
    if conf['loglevel'] < 1 or conf['loglevel'] > 5:
        # loglevel: 1-5
        errMsg = "loglevel value error(input 1-5)"
        parser.error(errMsg)

    if conf['loglevel'] == 1:
        loglevel = logging.CRITICAL
    elif conf['loglevel'] == 2:
        loglevel = logging.ERROR
    elif conf['loglevel'] == 3:
        loglevel = logging.WARN
    elif conf['loglevel'] == 4:
        loglevel = logging.INFO
    elif conf['loglevel'] == 5:
        loglevel = logging.DEBUG
    else:
        loglevel = logging.ERROR

    conf['logfile'] = (os.path.basename(args.logfile) if args.logfile \
                                                    else 'spider.log' if args.test \
                                                                        else conf['name']+".log")

    f = open("log/" + conf['logfile'], 'a+')
    Log_Handle = logging.StreamHandler(f)
    # Log_Handle = logging.StreamHandler(sys.stdout)
    FORMATTER = logging.Formatter("\r[%(asctime)s] [%(levelname)s] [%(thread)d] %(message)s", "%H:%M:%S")
    Log_Handle.setFormatter(FORMATTER)
    logger.addHandler(Log_Handle)
    logger.setLevel(loglevel)

    conf['dbfile'] = (os.path.basename(args.dbfile) if args.dbfile else conf['name']+".db")
    conf['thread'] = (args.thread if args.thread else 10)
    if conf['thread'] < 0 or conf['thread'] > 50:
        # thread = 0 表示不使用多线程
        # 最大值设为50
        errMsg = "thread value error (0-50, 0 means not use thread)"
        parser.error(errMsg)

    logger.debug('parsing command line suc')
Example #25
0
    def hand_domain(self, serviceType):
        target = serviceType["target"]
        logger.info(target)
        # 添加这条记录
        collector.add_domain(target)
        # 发起请求
        try:
            r = requests.get(target,
                             timeout=30,
                             verify=False,
                             allow_redirects=False)
            collector.add_domain_info(target, {
                "headers": r.headers,
                "body": r.text,
                "status_code": r.status_code
            })
        except Exception as e:
            logger.error("request url error:" + str(e))
            collector.del_domain(target)
            return
        logger.debug("target:{} over,start to scan".format(target))

        # Get hostname
        # ???????????WDNMD
        hostname = urlparse(target).netloc.split(":")[0]
        if not is_ip_address_format(hostname):
            try:
                # return the host from socket
                _ip = socket.gethostbyname(hostname)
                collector.add_domain_info(target, {"ip": _ip})
            except:
                pass
        else:
            collector.add_domain_info(target, {"ip": hostname})
        # 需要启动那些poc进行目标信息扫描
        work_list = [webeye.poc, webtitle.poc, wappalyzer.poc]
        # password_found.poc

        if IS_START_PLUGINS:
            pass
            work_list.append(crossdomain.poc)
            # work_list.append(directory_browse.poc)
            work_list.append(gitleak.poc)
            work_list.append(iis_parse.poc)
            work_list.append(phpinfo.poc)
            work_list.append(svnleak.poc)
            work_list.append(tomcat_leak.poc)
            # work_list.append(whatcms.poc)

        # 信息直接从函数的内部利用collector进行存储

        for func in work_list:
            try:
                func(target)
            except Exception as e:
                logger.error("domain plugin threading error {}:{}".format(
                    repr(Exception), str(e)))
                pass
        logger.debug("target:{} End of scan".format(target))
        collector.print_domains()
        infos = collector.get_domain(target)
        _pocs = []
        temp = {}
        if IS_START_PLUGINS and "CMS" in infos:
            if infos.get("app"):
                temp["app"] = []
                temp["app"].append(infos["CMS"])
            else:
                temp["app"] = [infos["CMS"]]
            # update domain app
            collector.add_domain_info(target, temp)

        if temp.get("app"):
            keywords = temp["app"]
            # 远程读取插件
            pocs = load_remote_poc()

            for poc in pocs:
                for keyword in keywords:
                    webfile = poc["webfile"]
                    logger.debug("load {0} poc:{1} poc_time:{2}".format(
                        poc["type"], webfile, poc["time"]))

                    # 加载插件 加载远程文件目录 将其转换成实体

                    code = requests.get(webfile).text
                    obj = load_string_to_moudle(code, webfile)
                    # 在模块对象列表中加入远程模块
                    _pocs.append(obj)
        # 并发执行插件
        if _pocs:
            executor = futures.ThreadPoolExecutor(len(_pocs))
            fs = []
            for f in _pocs:
                taks = executor.submit(f.poc, target)
                # 这儿返回的是啥子鸡巴啊  每个线程的控制类?
                fs.append(taks)
            for f in futures.as_completed(fs):
                try:
                    res = f.result()
                except Exception as e:
                    res = None
                    logger.error("load poc error:{} error:{}".format(
                        target, str(e)))
                if res:
                    name = res.get("name") or "scan_" + str(time.time())
                    collector.add_domain_bug(target, {name: res})
        # 通过异步调用插件得到返回结果,并且通过collector返送结果
        collector.send_ok(target)
        print("print collector")
        print(collector.collect_domains)
Example #26
0
 def _work(self):
     while True:
         self.load_lock.acquire()
         if len(self.targets) > 0 and self.is_continue:
             subdomain = self.targets.popleft()
             self.load_lock.release()
             try:
                 logger.debug("Curling %s..." %(subdomain))
                 flag = False
                 codes = ['utf-8', 'gbk']
                 for pro in ['http://', "https://"]:
                     url = pro + subdomain + '/'
                     headers = {
                         'Connection': 'keep-alive',
                         'Pragma': 'no-cache',
                         'Cache-Control': 'no-cache',
                         'Upgrade-Insecure-Requests': '1',
                         'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
                         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
                         'DNT': '1',
                         'Accept-Encoding': 'gzip, deflate',
                         'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8'
                     }
                     res = self._curl(url,headers = headers)
                     if res != None:
                         try:
                             length = int(res.headers['content-length'])
                         except:
                             length = len(str(res.headers)) + len(res.text)
                         soup = BeautifulSoup(res.text, "html5lib")
                         if soup !=None:
                             title = soup.title
                             if title == None or title.string == None or title.string == '':
                                 title = "网页没有标题".encode('utf-8')
                             else:
                                 if res.encoding!= None:
                                     title = title.string.encode(res.encoding)
                                     codes.append(res.encoding)
                                 else:
                                     title = title.string
                             codes.append(self.type_code)
                             for j in range(0, len(codes)):
                                 try:
                                     title = title.decode(codes[j]).strip().replace("\r", "").replace("\n", "")
                                     break
                                 except:
                                     continue
                                 finally:
                                     if j + 1 == len(codes):
                                         title = '网页标题编码错误'
                             self.ret.append([subdomain, url, title, res.status_code, length])
                             flag = True
                             break
                         else:
                             title = '网页没有标题'
                             self.ret.append(
                                 [subdomain, url, title, res.status_code,length])
                 if not flag:
                     self.ret.append([subdomain, "", "", 0, 0])
             except Exception:
                 self.errmsg = traceback.format_exc()
                 self.is_continue = False
                 logger.error(self.errmsg)
         else:
             self.load_lock.release()
             break
Example #27
0
class SpiderControl:
    '''
    爬虫控制类
    '''
    def __init__(self):
        '''
        初始化
        self.url 根url
        self.deep 爬取深度
        self.db 数据库操作类
        self._thread 线程池
        '''
        logger.info('init control class')
        self.url = conf['url']
        self.deep = conf['deep']
        self.db = operate['db']
        self._thread = ThreadPool(conf['thread'], self.get_html)

    def run(self):
        '''
        主控方法
        :return: None
        '''
        logger.info("start spider, and the spider deep is " + str(self.deep))
        self.url_group = []
        self.r_group = []
        self.recursion_deep()
        logger.info("The spider page total number is : " +
                    str(len(self.url_group)))
        self._thread._del()
        logger.info("Spider OVER!!")

    def recursion_deep(self):
        '''
        根据深度值进行爬取
        operate['db'].deep 当前深度
        self.deep 需要爬取的深度
        :return:
        '''
        if operate['db'].deep == 0:
            logger.info("spidering deep == 0 page")
            r = self.get_html(self.url)
            try:
                html = r['html']
            except:
                print "url input error!"
                logger.error("url error(%s)" % (self.url))
                return

            operate['db'].insert(html, self.url)
            self.r_group.append(r)
            operate['db'].deep += 1
            self.recursion_deep()
        elif operate['db'].deep > self.deep:
            logger.info('spider deep over!')
            return
        else:
            logger.info("spidering deep = %s" % operate['db'].deep)
            tmp = []
            url_group = []

            # 从上一个deep爬取的页面中提取url
            for x in self.r_group:
                html = x['html']
                url_group.extend(self.find_url(html))
                logger.debug("from %s page find %s url" %
                             (x['url'], len(url_group)))

            # 当页面没匹配出任何url, 则结束退出
            if url_group == []:
                return
            # 把提取出来的url丢入线程池中
            result_list = self._thread.my_map(url_group)
            for y in xrange(len(result_list)):
                if result_list[y]['type'] == 'html':
                    tmp.append(result_list[y])
                else:
                    logger.debug("delete the not html page (%s)" %
                                 url_group[y])

            self.r_group = tmp
            operate['db'].deep += 1
            self.recursion_deep()

    def find_url(self, html):
        '''
        使用BeautifulSoup找出网页中的url
        :param html: html页面
        :return: 返回一个list, 其值为html中url
        PS: 暂只考虑a标签中的href属性中的url
        '''
        url_group = []
        logger.debug("start find url in a html")
        try:
            bs = BeautifulSoup(html, 'lxml')
        except Exception, e:
            logger.error("bs4(html) fail!\nthe error info is : " + str(e))
            return

        comp = re.compile("^https?://[/\w\.-]*/?[\w&\+%=-]*")

        for x in bs.findAll('a'):
            try:
                if comp.match(x['href']):
                    logger.debug("%s match suc" % x['href'])
                    if x['href'] not in self.url_group:
                        url_group.append(x['href'])
            except KeyError:
                logger.debug(str(x) + " | <match href fail>")
                continue

        logger.debug("find %s url" % (len(url_group)))
        self.url_group.extend(url_group)
        return url_group
Example #28
0
            "User-Agent":
            "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0"
        }
        result = {"type": None}
        logger.info("request a url: %s" % url)
        try:
            req = requests.get(url, headers=header, timeout=4)
        except Exception, e:
            try:
                logger.error("%s @@ requests fail and the info is %s" %
                             (url.encode('utf-8'), e))
            except:
                print url
                print isinstance(url, unicode)
            return result

        if 'text/html' in req.headers['Content-Type']:
            logger.debug("get a html page: " + url)
            result['type'] = 'html'
            result['html'] = req.text
            result['url'] = url
        elif 'text/javascript' in req.headers['Content-Type']:
            logger.debug("get a js page: " + url)
            result['type'] = 'js'
            result['html'] = req.text
            result['url'] = url
        else:
            logger.warn("the page is not a html or a js(" + url + ")")

        return result
Example #29
0
        header = {
            "User-Agent":
                "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0"
        }
        result = {"type": None}
        logger.info("request a url: %s" %url)
        try:
            req = requests.get(url, headers=header, timeout=4)
        except Exception, e:
            try:
                logger.error("%s @@ requests fail and the info is %s" %(url.encode('utf-8'), e))
            except:
                print url
                print isinstance(url, unicode)
            return result

        if 'text/html' in req.headers['Content-Type']:
            logger.debug("get a html page: " + url)
            result['type'] = 'html'
            result['html'] = req.text
            result['url'] = url
        elif 'text/javascript' in req.headers['Content-Type']:
            logger.debug("get a js page: " + url)
            result['type'] = 'js'
            result['html'] = req.text
            result['url'] = url
        else:
            logger.warn("the page is not a html or a js("+url+")")

        return result
Example #30
0
 def replace_subdomain_status(self, subdomain,url,title,status,len,update_time,mon_domain):
     logger.debug("Replace subdomain: %s" % (subdomain))
     self.execute(
         "REPLACE into subdomain (subdomain, url,title,status,len,update_time,mon_domain) VALUES (?, ?, ?, ?, ?, ?, ?) ",(subdomain, url, title, status, len, update_time,mon_domain))
Example #31
0
 def update_subdomain_status(self, subdomain,url,title,status,len,update_time):
     logger.debug("Update subdomain: %s" % (subdomain))
     self.execute("UPDATE subdomain set  url = ?, title = ?, status = ?, len = ?, update_time = ? WHERE subdomain = ? and ( title != ? or status != ?)",(url,title,status,len,update_time,subdomain,title,status))
Example #32
0
 def insert_subdomain(self, subdomain,url,title,status,len,update_time,mon_domain):
     logger.debug("Insert subdomain: %s" % (subdomain))
     self.execute("INSERT  OR IGNORE INTO subdomain (subdomain,url,title,status,len,update_time,mon_domain) VALUES (?, ?, ?, ?, ?, ?, ?)",(subdomain,url,title,status,len,update_time,mon_domain))
Example #33
0
    def hand_domain(self, serviceType):
        target = serviceType["target"]
        logger.info(target)
        # 添加这条记录
        collector.add_domain(target)
        # 发起请求
        try:
            r = requests.get(target,
                             timeout=30,
                             verify=False,
                             allow_redirects=False)
            collector.add_domain_info(target, {
                "headers": r.headers,
                "body": r.text,
                "status_code": r.status_code
            })
        except Exception as e:
            logger.error("request url error:" + str(e))
            collector.del_domain(target)
            return
        logger.debug("target:{} over,start to scan".format(target))

        # Get hostname
        hostname = urlparse(target).netloc.split(":")[0]
        if not is_ip_address_format(hostname):
            try:
                _ip = socket.gethostbyname(hostname)
                collector.add_domain_info(target, {"ip": _ip})
            except:
                pass
        else:
            collector.add_domain_info(target, {"ip": hostname})

        work_list = [
            webeye.poc, webtitle.poc, wappalyzer.poc, password_found.poc
        ]

        if IS_START_PLUGINS:
            work_list.append(crossdomain.poc)
            work_list.append(directory_browse.poc)
            work_list.append(gitleak.poc)
            work_list.append(iis_parse.poc)
            work_list.append(phpinfo.poc)
            work_list.append(svnleak.poc)
            work_list.append(tomcat_leak.poc)
            work_list.append(whatcms.poc)

        # WorkList.append(bakfile.poc) # 去除备份文件扫描模块,原因:太费时

        # th = []
        # try:
        #     for func in work_list:
        #         i = threading.Thread(target=func, args=(target,))
        #         i.start()
        #         th.append(i)
        #     for thi in th:
        #         thi.join()
        # except Exception as e:
        #     logger.error("domain plugin threading error {}:{}".format(repr(Exception), str(e)))
        for func in work_list:
            try:
                func(target)
            except Exception as e:
                logger.error("domain plugin threading error {}:{}".format(
                    repr(Exception), str(e)))

        logger.debug("target:{} End of scan".format(target))
        infos = collector.get_domain(target)
        _pocs = []
        temp = {}
        if IS_START_PLUGINS and "CMS" in infos:
            if infos.get("app"):
                temp["app"] = []
                temp["app"].append(infos["CMS"])
            else:
                temp["app"] = [infos["CMS"]]
            # update domain app
            collector.add_domain_info(target, temp)

        if temp.get("app"):
            keywords = temp["app"]
            # 远程读取插件
            pocs = load_remote_poc()

            for poc in pocs:
                for keyword in keywords:
                    if poc["name"] == keyword:
                        webfile = poc["webfile"]
                        logger.debug("load {0} poc:{1} poc_time:{2}".format(
                            poc["type"], webfile, poc["time"]))
                        # 加载插件
                        code = requests.get(webfile).text
                        obj = load_string_to_module(code, webfile)
                        _pocs.append(obj)

        # 并发执行插件
        if _pocs:
            executor = futures.ThreadPoolExecutor(len(_pocs))
            fs = []
            for f in _pocs:
                taks = executor.submit(f.poc, target)
                fs.append(taks)
            for f in futures.as_completed(fs):
                try:
                    res = f.result()
                except Exception as e:
                    res = None
                    logger.error("load poc error:{} error:{}".format(
                        target, str(e)))
                if res:
                    name = res.get("name") or "scan_" + str(time.time())
                    collector.add_domain_bug(target, {name: res})

        collector.send_ok(target)
Example #34
0
#!/usr/bin/env python
# -*- coding:utf-8 -*-

from lib.data import conf
from lib import control
from lib.data import logger
from lib.data import operate
from lib.options import oparser
from lib.database import SpiderDb

__author__ = "Hcamael"

if __name__ == '__main__':
    logger.debug("Begin Spider")
    oparser()
    operate['db'] = SpiderDb(conf['dbfile'])
    c = control.SpiderControl()
    c.run()