Beispiel #1
0
def Sub_Brute(Sub_Domains):
    while 1:
        for domain in Sub_Domains:
            res = []
            Br = Brute(domain)
            res = Br.start()
            res = list(set(res))
            if res != []:
                with ThreadPoolExecutor(max_workers=pool_count) as pool4:
                    result = pool4.map(Add_Data_To_Url, res)
            # 每爆破一个子域名,歇会儿
            time.sleep(60)
        time.sleep(3600 * 24)
Beispiel #2
0
def Sub_Brute(Domains):
    for domain in Domains:
        res = Brute(domain).start()
        print(res)
        if res:
            with ProcessPoolExecutor() as pool:
                result = pool.map(Add_Data_To_Url, res)
Beispiel #3
0
def Sub_Brute(Sub_Domains):
    for domain in Sub_Domains:
        res = []
        res = Brute(domain).start()
        res = list(set(res))
        if res != []:
            with ProcessPoolExecutor(max_workers=pool_count) as pool:
                result = pool.map(Add_Data_To_Url, res)
        # 每爆破一个子域名,歇会儿
        time.sleep(360)
Beispiel #4
0
def Run_Crawl(Domains):
    Domains = ['.' + str(x) for x in Domains]
    time.sleep(random.randint(10, 20))
    time.sleep(random.randint(10, 20))
    time.sleep(random.randint(10, 20))
    time.sleep(random.randint(10, 20))
    time.sleep(random.randint(10, 20))
    time.sleep(random.randint(10, 20))
    time.sleep(random.randint(10, 20))
    time.sleep(random.randint(10, 20))
    time.sleep(random.randint(10, 20))
    '''
    2019-12-23
    新增监控域名是否监控状态,所以此处需要修改逻辑
    1. 如果你之前扫描过该网址
    2. 那么网址索引表就有该网址
    3. 但是如果中期停止监控该域名
    4. 那么按照上面的获取数据的定式方式,还是获取一样的结果
    5. 所以需要做一个判断
    6. 但是后期你又要把这个域名设置为监控状态
    7. 所以来了个翻转两次的逻辑
    最终结论:有可能在A网址爬到了B的子域名,但是在B的网址爬不到B的子域名
    所以多爬几次影响不大,不建议不爬行,所以此处不做修改
    for subd in ALL_DOMAINS:
        if subd in url:
            ins = True
            target_url.get = '是'
            # 这里需要提前设置的原因是,防止下一个进程启动重复 使用 同一个数据
            target_url.save()
    if ins == False:
        target_url.get = '空'
        target_url.save()
        return
    '''
    try:
        target_url = URL.objects.filter(get='否')[0]
        url = target_url.url
        target_url.get = '是'
        target_url.save()
        # 这里需要提前设置的原因是,防止下一个进程启动重复 使用 同一个数据
    except Exception as e:
        Except_Log(stat=31, url='|获取URL并设置扫描状态失败|', error='获取预爬行网址失败')
        # 在获取失败(数据库没数据存入),重试一次
        time.sleep(600)
        ResetCrawl(db=Dbname)
        return

    try:
        All_Urls = Crawl(url)
        if All_Urls != []:
            All_Urls = set(All_Urls)
            Other_Domains = []
            if list(All_Urls) != [] and All_Urls != None:
                try:
                    Sub_Domains1 = set(
                        [y for x in Domains for y in All_Urls if x in y])
                    if list(Sub_Domains1) != []:
                        with ThreadPoolExecutor(
                                max_workers=pool_count) as pool1:
                            result = pool1.map(Add_Data_To_Url,
                                               list(Sub_Domains1))
                    Other_Domains = list(All_Urls - Sub_Domains1)
                except Exception as e:
                    Except_Log(stat=11, url='|获取URL失败|', error=str(e))

                if Other_Domains != [] and Other_Domains != None:
                    try:
                        for urle in Other_Domains:
                            if '.gov.cn' not in urle and '.edu.cn' not in urle:
                                try:
                                    try:
                                        Test_Other_Url = list(
                                            Other_Url.objects.filter(url=urle))
                                    except:
                                        close_old_connections()
                                        Test_Other_Url = list(
                                            Other_Url.objects.filter(url=urle))
                                    if Test_Other_Url == []:
                                        ip = get_host(urle)
                                        res = Get_Url_Info(urle).get_info()
                                        res_url = res.get('url')
                                        try:
                                            res_title = pymysql.escape_string(
                                                res.get('title'))
                                        except:
                                            res_title = 'Error'
                                        res_power = res.get('power')
                                        res_server = res.get('server')
                                        status = res.get('status')
                                        res_ip = ip
                                        #if int(status) in Alive_Status:
                                        try:
                                            Other_Url.objects.create(
                                                url=res_url,
                                                title=res_title,
                                                power=res_power,
                                                server=res_server,
                                                status=status,
                                                ip=res_ip)
                                        except Exception as e:
                                            Except_Log(stat=33,
                                                       url=url + '|资产爬行错误|',
                                                       error=str(e))
                                            close_old_connections()
                                            Other_Url.objects.create(
                                                url=res_url,
                                                title='Error',
                                                power=res_power,
                                                server=res_server,
                                                status=status,
                                                ip=res_ip)
                                except Exception as e:
                                    Except_Log(stat=37,
                                               url=url + '|资产爬行错误|',
                                               error=str(e))
                    except Exception as e:
                        Except_Log(stat=36, url=url + '|资产爬行错误|', error=str(e))
        try:
            '''
            2019-12-23
            虽然上面的爬行网址不做逻辑修改
            但是此处获取下一级子域名就没必要获取没在监控列表的域名了
            '''
            for sub in Domains:
                if sub in url:
                    Br = Brute(url)
                    res = Br.substart()
                    res = list(set(res))
                    if res != []:
                        if len(res) > 150:
                            for r in res:
                                print(
                                    '[+ URL Universal] 泛解析网址自动过滤 : {}'.format(
                                        r))
                                try:
                                    close_old_connections()
                                    BLACKURL.objects.create(
                                        url=r,
                                        ip=get_host(r),
                                        title=RequestsTitle(r),
                                        resons='泛解析自动过滤')
                                except:
                                    pass
                        else:
                            with ThreadPoolExecutor(
                                    max_workers=pool_count) as pool2:
                                result = pool2.map(Add_Data_To_Url, list(res))
        except Exception as e:
            Except_Log(stat=65, url=url + '|下级子域名爆破失败|', error=str(e))
    except Exception as e:
        Except_Log(stat=32, url=url + '|网址爬行错误|', error=str(e))
Beispiel #5
0
def Sub_Brute():
    for domain in Domains:
        urls = Brute(domain).start()
        if urls:
            Add_Data_To_Url(urls)