예제 #1
0
def Add_Data_To_Url(urls):
    for url in urls:
        try:
            res = Get_Url_Info(url).get_info()
            res_url = res.get('url')
            res_title = res.get('title')
            res_power = res.get('power')
            res_server = res.get('server')
            Other_Url.objects.create(url=res_url,
                                     title=res_title,
                                     power=res_power,
                                     server=res_server)
        except Exception as e:
            print('错误代码 [29] {}'.format(str(e)))
            Error_Log.objects.create(url=url,
                                     error='错误代码 [29] {}'.format(str(e)))
        try:
            ip = get_host(url)
            if ip == '获取失败':
                return
            print('URL --> {}  IP --> {}'.format(url, ip))
            URL.objects.create(url=url, ip=ip)

            test_ip = list(IP.objects.filter(ip=ip))
            if test_ip != []:
                return
            IP_Res = Get_Ip_Info(ip)
            servers = IP_Res.get_server_from_nmap(ip)
            # 服务与端口  字典类型
            open_port = servers.keys()
            check_alive_url = []
            for port in open_port:
                check_alive_url.append('http://{}:{}'.format(ip, port))
                check_alive_url.append('https://{}:{}'.format(ip, port))
            alive_url = Get_Alive_Url(check_alive_url)
            # 该IP上存活WEB,类型为列表,内容为多个字典
            host_type = IP_Res.get_host_type(ip)
            # windows/linux
            area = IP_Res.get_ip_address(ip)
            # 返回地址

            IP_Obj = IP()
            IP_Obj.ip = ip
            IP_Obj.servers = str(servers)
            IP_Obj.host_type = host_type
            IP_Obj.alive_urls = str(alive_url)
            IP_Obj.area = area
            try:
                print(ip, servers, host_type, area)
            except Exception as e:
                print('错误代码 [34] {}'.format(str(e)))
                Error_Log.objects.create(url=url,
                                         error='错误代码 [34] {}'.format(str(e)))
            IP_Obj.save()
        except Exception as e:
            print('错误代码 [30] {}'.format(str(e)))
            Error_Log.objects.create(url=url,
                                     error='错误代码 [30] {}'.format(str(e)))
예제 #2
0
def Sub_Crawl():
    while 1:
        time.sleep(20)
        try:
            target_url = URL.objects.filter(get='否')[0]
            url = target_url.url
            target_url.get = '是'
            target_url.save()
        except Exception as e:
            time.sleep(200)
            try:
                target_url = URL.objects.filter(get='否')[0]
                url = target_url.url
                target_url.get = '是'
                target_url.save()
            except Exception as e:
                print('错误代码 [31] {}'.format(str(e)))
                Error_Log.objects.create(url=url,
                                         error='错误代码 [31] {}'.format(str(e)))
            return
        print(url)
        try:
            All_Urls = Crawl(url)
            print(All_Urls)
            if All_Urls:
                Sub_Domains = list(
                    set([y for x in Domains for y in All_Urls if x in y]))
                if Sub_Domains != []:
                    Add_Data_To_Url(Sub_Domains)
                Other_Domains = list(
                    set([y for x in Domains for y in All_Urls if x not in y]))
                if Other_Domains:
                    for url in Other_Domains:
                        print(url)
                        try:
                            res = Get_Url_Info(url).get_info()
                            res_url = res.get('url')
                            res_title = res.get('title')
                            res_power = res.get('power')
                            res_server = res.get('server')
                            Other_Url.objects.create(url=res_url,
                                                     title=res_title,
                                                     power=res_power,
                                                     server=res_server)
                        except Exception as e:
                            print('错误代码 [33] {}'.format(str(e)))
                            Error_Log.objects.create(
                                url=url, error='错误代码 [33] {}'.format(str(e)))
        except Exception as e:
            print('错误代码 [32] {}'.format(str(e)))
            Error_Log.objects.create(url=url,
                                     error='错误代码 [32] {}'.format(str(e)))
예제 #3
0
def Run_Crawl(Domains):
    while 1:
        time.sleep(random.randint(1, 20))
        time.sleep(random.randint(1, 20))
        time.sleep(random.randint(1, 20))
        time.sleep(random.randint(1, 20))
        time.sleep(random.randint(1, 20))
        try:
            target_url = URL.objects.filter(get='否')[0]
            url = target_url.url
            target_url.get = '是'
            target_url.save()
            # 这里需要提前设置的原因是,防止下一个进程启动重复 使用 同一个数据
        except Exception as e:
            time.sleep(600)
            # 在获取失败(数据库没数据存入),重试一次
            try:
                target_url = URL.objects.filter(get='否')[0]
                url = target_url.url
                target_url.get = '是'
                target_url.save()
            except Exception as e:
                print('错误代码 [31] {}'.format(str(e)))
                Error_Log.objects.create(url='获取URL失败', error='错误代码 [31] {}'.format(str(e)))
                return
        try:
            All_Urls = set(Crawl(url))
            Other_Domains = []
            if list(All_Urls) != []:
                try:
                    Sub_Domains1 = set([y for x in Domains for y in All_Urls if x in y])
                    if list(Sub_Domains1) != []:
                        with ProcessPoolExecutor(max_workers=pool_count) as pool:
                            result = pool.map(Add_Data_To_Url, list(Sub_Domains1))
                    Other_Domains = list(All_Urls-Sub_Domains1)
                except Exception as e:
                    print('错误代码 [11] {}'.format(str(e)))
                    Error_Log.objects.create(url=url, error='错误代码 [11] {}'.format(str(e)))
                if Other_Domains != []:
                    for urle in Other_Domains:
                        try:
                            Test_Other_Url = list(Other_Url.objects.filter(url=urle))
                            if Test_Other_Url == []:
                                ip = get_host(urle)
                                res = Get_Url_Info(urle).get_info()
                                res_url = res.get('url')
                                res_title = res.get('title')
                                res_power = res.get('power')
                                res_server = res.get('server')
                                status = res.get('status')
                                res_ip = ip
                                if int(status) in Alive_Status:
                                    Other_Url.objects.create(url=res_url, title=res_title, power=res_power, server=res_server,status=status,ip=res_ip)
                        except Exception as e:
                            print('错误代码 [33] {}'.format(str(e)))
                            Error_Log.objects.create(url=url, error='错误代码 [33] {}'.format(str(e)))
        except Exception as e:
            print('错误代码 [32] {}'.format(str(e)))
            Error_Log.objects.create(url=url, error='错误代码 [32] {}'.format(str(e)))
예제 #4
0
def Add_Data_To_Url(url):
    # close_old_connections()
    try:
        ip = get_host(url)
        if ip == '获取失败':
            return
        # print('[+ Domain UrlIP] IP解析 --> {}  IP --> {}'.format(url, ip))
    #  Sem.acquire()
        try:
            test_url = list(URL.objects.filter(url=url))
        except:
            try:
                test_url = list(URL.objects.filter(url=url))
            except:
                test_url = list(URL.objects.filter(url=url))

        # Sem.release()
        # 如果数据库有这个网站的话,就直接退出
        if test_url != []:
            return

        try:
            Test_Other_Url = Other_Url.objects.filter(url=url)
            # 判断网络资产表是否有这个数据,如果没有的话,就添加进去
            if list(Test_Other_Url) == []:
                res = Get_Url_Info(url).get_info()
                res_url = res.get('url')
                try:
                    res_title = pymysql.escape_string(res.get('title'))
                except Exception as e:
                    res_title = 'Error'
                    Except_Log(stat=11, url=url + '|网页内容转码失败', error=str(e))
                res_power = res.get('power')
                res_server = res.get('server')
                res_status = res.get('status')
                res_ip = ip
                try:
                    Other_Url.objects.create(url=res_url,
                                             title=res_title,
                                             power=res_power,
                                             server=res_server,
                                             status=res_status,
                                             ip=res_ip)
                except Exception as e:
                    Except_Log(stat=17, url=url + '|标题等信息编码不符合', error=str(e))
                    Other_Url.objects.create(url=res_url,
                                             title='Error',
                                             power='Error',
                                             server=res_server,
                                             status=res_status,
                                             ip=res_ip)
        except Exception as e:
            Except_Log(stat=29, url=url + '|网络资产表错误', error=str(e))
        try:
            # res = Get_Url_Info(url).get_info()
            # res_status = res.get('status')
            # 再次获取状态码,判断是否符合入库状态,以保证数据统一
            # if int(res_status) not in Alive_Status:
            #     return

            # 接下来可以进行数据索引唯一统一
            '''
            这里添加网址资产到 索引表 和 清洗表
            '''
            test_url1 = list(URL.objects.filter(url=url))
            # 如果数据库有这个网站的话,就直接退出

            if test_url1 == []:
                URL.objects.create(url=url, ip=ip)
                # 添加 网址索引
                try:
                    try:
                        Show_contents = pymysql.escape_string(
                            Get_Url_Info(url).Requests()[0])
                        Cont = Content()
                        Cont.url = url
                        Cont.content = Show_contents
                        IP_Res = Get_Ip_Info(ip)
                        Show_cs = IP_Res.get_cs_name(ip)
                        Cont.save()
                        Show_Data.objects.create(url=url,
                                                 ip=ip,
                                                 cs=Show_cs,
                                                 content=Cont)
                    except Exception as e:
                        Except_Log(stat=4, url=url + '|外键添加错误', error=str(e))
                        Show_contents = 'Error'
                        Cont = Content()
                        Cont.url = url
                        Cont.content = Show_contents
                        IP_Res = Get_Ip_Info(ip)
                        Show_cs = IP_Res.get_cs_name(ip)
                        Cont.save()
                        Show_Data.objects.create(url=url,
                                                 ip=ip,
                                                 cs=Show_cs,
                                                 content=Cont)
                    # 添加网页内容,数据展示
                except Exception as e:
                    Except_Log(stat=8, url=url + '|外键添加错误', error=str(e))

                This_Sub = [x for x in ALL_DOMAINS if x in url]
                # 获取到当前子域名属于的主域名
            try:
                # 尝试进行域名总数据获取检测
                if This_Sub != []:
                    Domain_Count = Domains.objects.filter(url=This_Sub[0])[0]
                    counts = Other_Url.objects.filter(
                        url__contains=This_Sub[0])
                    Domain_Count.counts = str(len(counts))
                    # counts = int(Domain_Count.counts)+1
                    # Domain_Count.counts = counts
                    Domain_Count.save()
            except Exception as e:
                Except_Log(stat=15,
                           url=url + '|获取归属域名失败|' + This_Sub,
                           error=str(e))
        except Exception as e:
            Except_Log(stat=22, url=url + '|添加到网址索引表失败|', error=str(e))

        test_ip = list(IP.objects.filter(ip=ip))
        # 开始添加ip 维护ip统一
        # 这里开始判断数据库中是否有这个ip,并且先添加然后修改(防止重复浪费资源)
        if test_ip != []:
            test_ip_0 = IP.objects.filter(ip=ip)[0]
            # 这里判断数据中IP时候存在,如果存在并且有扫描状态,就直接中断操作
            if test_ip_0.get == '是' or test_ip_0.get == '中':
                return
        if test_ip == []:
            try:
                IP_Res = Get_Ip_Info(ip)
                area = IP_Res.get_ip_address(ip)
                cs_name = IP_Res.get_cs_name(ip)
                try:
                    IP.objects.create(ip=ip,
                                      servers='None',
                                      host_type='None',
                                      cs=cs_name,
                                      alive_urls='None',
                                      area=area)
                    # 这里先添加数据,异步执行获取到的数据作为结果给下个进程使用
                    # 这里本来是要扫描c段开放端口,但是这样就相当于把耗时操作加载到同步执行的线程中
                    # 于是把扫描开放端口  放在获取ip详细信息线程中处理
                except:
                    Except_Log(stat=86, url=url + '|转换IP地区编码失败|', error=str(e))
                    IP.objects.create(ip=ip,
                                      servers='None',
                                      host_type='None',
                                      cs=cs_name,
                                      alive_urls='None',
                                      area='获取失败')

            except Exception as e:
                Except_Log(stat=21, url=url + '|添加IP资源失败|', error=str(e))

    except Exception as e:
        Except_Log(stat=30, url=url + '|维护传入网址失败|', error=str(e))
예제 #5
0
def Run_Crawl(Domains):
    Domains = ['.' + str(x) for x in Domains]
    # close_old_connections()
    time.sleep(random.randint(1, 20))
    time.sleep(random.randint(1, 20))
    time.sleep(random.randint(1, 20))
    try:
        target_url = URL.objects.filter(get='否')[0]
        url = target_url.url
        target_url.get = '是'
        target_url.save()
        # 这里需要提前设置的原因是,防止下一个进程启动重复 使用 同一个数据
    except Exception as e:
        time.sleep(600)
        # 在获取失败(数据库没数据存入),重试一次
        try:
            target_url0 = URL.objects.filter(get='否')[0]
            url = target_url0.url
            target_url0.get = '是'
            target_url0.save()
        except Exception as e:
            Except_Log(stat=31, url='|获取URL并设置扫描状态失败|', error=str(e))
            return
    try:
        All_Urls = Crawl(url)
        if All_Urls == []:
            return
        All_Urls = set(All_Urls)
        Other_Domains = []
        if list(All_Urls) != [] and All_Urls != None:
            try:
                Sub_Domains1 = set(
                    [y for x in Domains for y in All_Urls if x in y])
                if list(Sub_Domains1) != []:
                    with ThreadPoolExecutor(max_workers=pool_count) as pool1:
                        result = pool1.map(Add_Data_To_Url, list(Sub_Domains1))
                Other_Domains = list(All_Urls - Sub_Domains1)
            except Exception as e:
                Except_Log(stat=11, url='|获取URL失败|', error=str(e))
            if Other_Domains != [] and Other_Domains != None:
                try:
                    for urle in Other_Domains:
                        try:
                            try:
                                Test_Other_Url = list(
                                    Other_Url.objects.filter(url=urle))
                            except:
                                Test_Other_Url = list(
                                    Other_Url.objects.filter(url=urle))
                            if Test_Other_Url == []:
                                ip = get_host(urle)
                                res = Get_Url_Info(urle).get_info()
                                res_url = res.get('url')
                                try:
                                    res_title = pymysql.escape_string(
                                        res.get('title'))
                                except:
                                    res_title = 'Error'
                                res_power = res.get('power')
                                res_server = res.get('server')
                                status = res.get('status')
                                res_ip = ip
                                #if int(status) in Alive_Status:
                                try:
                                    Other_Url.objects.create(url=res_url,
                                                             title=res_title,
                                                             power=res_power,
                                                             server=res_server,
                                                             status=status,
                                                             ip=res_ip)
                                except Exception as e:
                                    Except_Log(stat=33,
                                               url=url + '|资产爬行错误|',
                                               error=str(e))
                                    Other_Url.objects.create(url=res_url,
                                                             title='Error',
                                                             power=res_power,
                                                             server=res_server,
                                                             status=status,
                                                             ip=res_ip)
                        except Exception as e:
                            Except_Log(stat=37,
                                       url=url + '|资产爬行错误|',
                                       error=str(e))
                except Exception as e:
                    Except_Log(stat=36, url=url + '|资产爬行错误|', error=str(e))

    except Exception as e:
        Except_Log(stat=32, url=url + '|网址爬行错误|', error=str(e))
예제 #6
0
def Run_Crawl(Domains):
    Domains = ['.' + str(x) for x in Domains]
    time.sleep(random.randint(10, 20))
    time.sleep(random.randint(10, 20))
    time.sleep(random.randint(10, 20))
    time.sleep(random.randint(10, 20))
    time.sleep(random.randint(10, 20))
    time.sleep(random.randint(10, 20))
    time.sleep(random.randint(10, 20))
    time.sleep(random.randint(10, 20))
    time.sleep(random.randint(10, 20))
    '''
    2019-12-23
    新增监控域名是否监控状态,所以此处需要修改逻辑
    1. 如果你之前扫描过该网址
    2. 那么网址索引表就有该网址
    3. 但是如果中期停止监控该域名
    4. 那么按照上面的获取数据的定式方式,还是获取一样的结果
    5. 所以需要做一个判断
    6. 但是后期你又要把这个域名设置为监控状态
    7. 所以来了个翻转两次的逻辑
    最终结论:有可能在A网址爬到了B的子域名,但是在B的网址爬不到B的子域名
    所以多爬几次影响不大,不建议不爬行,所以此处不做修改
    for subd in ALL_DOMAINS:
        if subd in url:
            ins = True
            target_url.get = '是'
            # 这里需要提前设置的原因是,防止下一个进程启动重复 使用 同一个数据
            target_url.save()
    if ins == False:
        target_url.get = '空'
        target_url.save()
        return
    '''
    try:
        target_url = URL.objects.filter(get='否')[0]
        url = target_url.url
        target_url.get = '是'
        target_url.save()
        # 这里需要提前设置的原因是,防止下一个进程启动重复 使用 同一个数据
    except Exception as e:
        Except_Log(stat=31, url='|获取URL并设置扫描状态失败|', error='获取预爬行网址失败')
        # 在获取失败(数据库没数据存入),重试一次
        time.sleep(600)
        ResetCrawl(db=Dbname)
        return

    try:
        All_Urls = Crawl(url)
        if All_Urls != []:
            All_Urls = set(All_Urls)
            Other_Domains = []
            if list(All_Urls) != [] and All_Urls != None:
                try:
                    Sub_Domains1 = set(
                        [y for x in Domains for y in All_Urls if x in y])
                    if list(Sub_Domains1) != []:
                        with ThreadPoolExecutor(
                                max_workers=pool_count) as pool1:
                            result = pool1.map(Add_Data_To_Url,
                                               list(Sub_Domains1))
                    Other_Domains = list(All_Urls - Sub_Domains1)
                except Exception as e:
                    Except_Log(stat=11, url='|获取URL失败|', error=str(e))

                if Other_Domains != [] and Other_Domains != None:
                    try:
                        for urle in Other_Domains:
                            if '.gov.cn' not in urle and '.edu.cn' not in urle:
                                try:
                                    try:
                                        Test_Other_Url = list(
                                            Other_Url.objects.filter(url=urle))
                                    except:
                                        close_old_connections()
                                        Test_Other_Url = list(
                                            Other_Url.objects.filter(url=urle))
                                    if Test_Other_Url == []:
                                        ip = get_host(urle)
                                        res = Get_Url_Info(urle).get_info()
                                        res_url = res.get('url')
                                        try:
                                            res_title = pymysql.escape_string(
                                                res.get('title'))
                                        except:
                                            res_title = 'Error'
                                        res_power = res.get('power')
                                        res_server = res.get('server')
                                        status = res.get('status')
                                        res_ip = ip
                                        #if int(status) in Alive_Status:
                                        try:
                                            Other_Url.objects.create(
                                                url=res_url,
                                                title=res_title,
                                                power=res_power,
                                                server=res_server,
                                                status=status,
                                                ip=res_ip)
                                        except Exception as e:
                                            Except_Log(stat=33,
                                                       url=url + '|资产爬行错误|',
                                                       error=str(e))
                                            close_old_connections()
                                            Other_Url.objects.create(
                                                url=res_url,
                                                title='Error',
                                                power=res_power,
                                                server=res_server,
                                                status=status,
                                                ip=res_ip)
                                except Exception as e:
                                    Except_Log(stat=37,
                                               url=url + '|资产爬行错误|',
                                               error=str(e))
                    except Exception as e:
                        Except_Log(stat=36, url=url + '|资产爬行错误|', error=str(e))
        try:
            '''
            2019-12-23
            虽然上面的爬行网址不做逻辑修改
            但是此处获取下一级子域名就没必要获取没在监控列表的域名了
            '''
            for sub in Domains:
                if sub in url:
                    Br = Brute(url)
                    res = Br.substart()
                    res = list(set(res))
                    if res != []:
                        if len(res) > 150:
                            for r in res:
                                print(
                                    '[+ URL Universal] 泛解析网址自动过滤 : {}'.format(
                                        r))
                                try:
                                    close_old_connections()
                                    BLACKURL.objects.create(
                                        url=r,
                                        ip=get_host(r),
                                        title=RequestsTitle(r),
                                        resons='泛解析自动过滤')
                                except:
                                    pass
                        else:
                            with ThreadPoolExecutor(
                                    max_workers=pool_count) as pool2:
                                result = pool2.map(Add_Data_To_Url, list(res))
        except Exception as e:
            Except_Log(stat=65, url=url + '|下级子域名爆破失败|', error=str(e))
    except Exception as e:
        Except_Log(stat=32, url=url + '|网址爬行错误|', error=str(e))
예제 #7
0
def Add_Data_To_Url(url):
    '''
    2019-12-10
        1. 该函数作用为传入网址进行IP黑名单过滤
        2. 该函数作用为传入网址进行【网络资产数据入库,网址索引数据入库,主机资产数据入库,监控域名数量入库处理】
    2020-01-14
        1. 新增泛解析过滤规则
    '''
    time.sleep(random.randint(5, 20))
    time.sleep(random.randint(5, 20))
    close_old_connections()
    urlhasdomain = check_black(url, ALL_DOMAINS)
    if urlhasdomain == False:
        print('[+ Insert Url] 当前网址不在域名监控域名范围内 : {}'.format(url))
        try:
            close_old_connections()
            BLACKURL.objects.create(url=url,
                                    ip=get_host(url),
                                    title=RequestsTitle(url),
                                    resons='当前网址不在域名监控域名范围内')
            return
        except:
            close_old_connections()
            return
    if '.gov.cn' in url or '.edu.cn' in url:
        return
    urlinblackurl = check_black(url, black_url)
    if urlinblackurl == True:
        print('[+ URL Blacklist] 当前网址触发黑名单 : {}'.format(url))
        try:
            burl = ''
            for blacurl in black_url:
                if blacurl in url:
                    burl = blacurl
            close_old_connections()
            BLACKURL.objects.create(url=url,
                                    ip=get_host(url),
                                    title=RequestsTitle(url),
                                    resons='触发网址黑名单:{}'.format(burl))
        except Exception as e:
            pass
        return

    try:
        ip = get_host(url)
        if ip == '获取失败':
            try:
                BLACKURL.objects.create(url=url,
                                        ip=get_host(url),
                                        title=RequestsTitle(url),
                                        resons='获取网址IP失败')
            except Exception as e:
                pass
            return
        if ip in black_ip:
            '''触发IP黑名单机制'''
            print('[+ IP Blacklist] 当前IP触发黑名单 : {} --> {}'.format(ip, url))
            try:
                BLACKURL.objects.create(url=url,
                                        ip=get_host(url),
                                        title=RequestsTitle(url),
                                        resons='触发IP黑名单:{}'.format(ip))
            except Exception as e:
                pass
            return

        try:
            test_url = list(URL.objects.filter(url=url))
        except:
            try:
                test_url = list(URL.objects.filter(url=url))
            except:
                close_old_connections()
                test_url = list(URL.objects.filter(url=url))

        if test_url != []:
            '''网址索引表如果已经有该网址,则直接退出'''
            return
        '''
        2020-01-14
        1. 这里开始对比泛解析数据,判断是否为泛解析网址
        2. 分别获取泛解析对比的 标题,ip,网页内容
        3. 然后先对比标题,标题一致,说明不是泛解析哦~不过携程还是哪个大厂名字忘了,访问频率过快的话,网页标题会变成 填写验证码xxxx
        4. 其次对比网页内容,如果网页内容相似度过大,则说明泛解析哦~
        5. 有人问,为什么不直接对比ip不就行了吗?其实不是的,比如xxadasda.yy.com--->aedqwawrqw668.sdada.yy.com很明显都是泛解析,但是解析的ip都是不一样的
        '''
        infjx = [x for x in ALL_DOMAINS if x in url]
        if infjx == []:
            return
        else:
            infjx = infjx[0]
        inftitle, infip, infcontent = DOMAINSINFOS[infjx][
            'title'], DOMAINSINFOS[infjx]['ip'], DOMAINSINFOS[infjx]['content']
        DD = Get_Url_Info(url).get_info()
        comtitle, comip, comcontent = DD['title'], DD['ip'], DD['content']
        # if inftitle != comtitle:
        #     # 如果标题不一样,决策为不是泛解析~,大概是80%的准确率,但是对安居客来说,这一点判断是无效的
        #     pass
        #else:
        if infcontent != 'Error' and comcontent != 'Error':
            if Return_Content_Difflib(infcontent, comcontent) == True:
                try:
                    print('[+ URL Universal] 泛解析网址自动过滤 : {}'.format(url))
                    close_old_connections()
                    BLACKURL.objects.create(url=url,
                                            ip=get_host(url),
                                            title=RequestsTitle(url),
                                            resons='泛解析自动过滤')
                    return
                except:
                    return
        else:
            DD1 = Get_Url_Info(url.replace('://', '://yyyyyyyyy')).get_info()
            comtitle1, comip1, comcontent1 = DD1['title'], DD1['ip'], DD1[
                'content']
            if Return_Content_Difflib(comcontent, comcontent1) == True:
                try:
                    print('[+ URL Universal] 泛解析网址自动过滤 : {}'.format(url))
                    close_old_connections()
                    BLACKURL.objects.create(url=url,
                                            ip=get_host(url),
                                            title=RequestsTitle(url),
                                            resons='泛解析自动过滤')
                    return
                except:
                    return

        print('[+ Insert Url] 入库网址 : {}'.format(url))
        try:
            Test_Other_Url = Other_Url.objects.filter(url=url)
            '''判断网络资产表是否有这个数据,如果没有的话,就添加进去'''
            if list(Test_Other_Url) == []:
                res = Get_Url_Info(url).get_info()
                res_url = res.get('url')
                try:
                    res_title = pymysql.escape_string(res.get('title'))
                except Exception as e:
                    res_title = 'Error'
                    Except_Log(stat=11, url=url + '|网页内容转码失败', error=str(e))
                res_power = res.get('power')
                res_server = res.get('server')
                res_status = res.get('status')
                res_ip = ip
                try:
                    Other_Url.objects.create(url=res_url,
                                             title=res_title,
                                             power=res_power,
                                             server=res_server,
                                             status=res_status,
                                             ip=res_ip)
                except Exception as e:
                    close_old_connections()
                    Except_Log(stat=17, url=url + '|标题等信息编码不符合', error=str(e))
                    Other_Url.objects.create(url=res_url,
                                             title='Error',
                                             power='Error',
                                             server=res_server,
                                             status=res_status,
                                             ip=res_ip)
        except Exception as e:
            Except_Log(stat=29, url=url + '|网络资产表错误', error=str(e))
        try:
            '''
            再次获取状态码,判断是否符合入库状态,以保证数据统一
            这里添加网址资产到 索引表 和 清洗表
            '''
            test_url1 = list(URL.objects.filter(url=url))
            '''如果网址索引表有这个网站的话,就直接退出'''
            if test_url1 == []:
                URL.objects.create(url=url, ip=ip)
                '''添加网址到网址索引表'''
                try:
                    try:
                        ZHRND = Get_Url_Info(url)
                        Sconten = ZHRND.get_info()['content']
                        if Sconten == 'Error':
                            '''到这里说明获取网页内容失败了'''
                            # print('{}:获取网页内容失败'.format(url))
                            pass
                        else:
                            try:
                                blackconincon = check_black(Sconten, black_con)
                                if blackconincon == True:
                                    '''触发网页内容黑名单'''
                                    burl = ''
                                    for blacurl in black_con:
                                        if blacurl in Sconten:
                                            burl = blacurl
                                    print(
                                        '[+ Cont Blacklist] 当前网页内容触发黑名单 : {}'.
                                        format(url))
                                    try:
                                        close_old_connections()
                                        BLACKURL.objects.create(
                                            url=url,
                                            ip=get_host(url),
                                            title=RequestsTitle(url),
                                            resons='触发网页内容黑名单:{}'.format(burl))
                                    except Exception as e:
                                        pass
                                    return
                            except:
                                Sconten = '获取失败'
                        Show_contents = pymysql.escape_string(Sconten)
                        Cont = Content()
                        Cont.url = url
                        Cont.content = Show_contents
                        IP_Res = Get_Ip_Info(ip)
                        Show_cs = IP_Res.get_cs_name(ip)
                        Cont.save()
                        Show_Data.objects.create(url=url,
                                                 ip=ip,
                                                 cs=Show_cs,
                                                 content=Cont)
                    except Exception as e:
                        close_old_connections()
                        Except_Log(stat=4, url=url + '|外键添加错误', error=str(e))
                        Show_contents = 'Error'
                        Cont = Content()
                        Cont.url = url
                        Cont.content = Show_contents
                        IP_Res = Get_Ip_Info(ip)
                        Show_cs = IP_Res.get_cs_name(ip)
                        Cont.save()
                        Show_Data.objects.create(url=url,
                                                 ip=ip,
                                                 cs=Show_cs,
                                                 content=Cont)
                        '''添加网页内容,数据展示'''
                except Exception as e:
                    Except_Log(stat=8, url=url + '|外键添加错误', error=str(e))

            This_Sub = [x for x in ALL_DOMAINS if x in url]
            '''获取到当前子域名属于的主域名'''
            try:
                '''尝试进行域名总数据获取检测'''
                if This_Sub != []:
                    Domain_Count = Domains.objects.filter(url=This_Sub[0])[0]
                    counts = Other_Url.objects.filter(
                        url__contains=This_Sub[0])
                    Domain_Count.counts = str(len(counts))
                    # counts = int(Domain_Count.counts)+1
                    # Domain_Count.counts = counts
                    Domain_Count.save()
            except Exception as e:
                Except_Log(stat=15,
                           url=url + '|获取归属域名失败|' + This_Sub,
                           error=str(e))
        except Exception as e:
            Except_Log(stat=22, url=url + '|添加到网址索引表失败|', error=str(e))

        try:
            test_ip = list(IP.objects.filter(ip=ip))
        except:
            close_old_connections()
            test_ip = list(IP.objects.filter(ip=ip))
            '''开始添加ip 维护ip统一
            这里开始判断数据库中是否有这个ip,并且先添加然后修改(防止重复浪费资源)
            if test_ip != []:
                test_ip_0 = IP.objects.filter(ip=ip)[0]
                # 这里判断数据中IP时候存在,如果存在并且有扫描状态,就直接中断操作
                if test_ip_0.get == '是' or test_ip_0.get == '中':
                    return'''
        if test_ip == []:
            try:
                IP_Res = Get_Ip_Info(ip)
                area = IP_Res.get_ip_address(ip)
                cs_name = IP_Res.get_cs_name(ip)
                try:
                    IP.objects.create(ip=ip,
                                      servers='None',
                                      host_type='None',
                                      cs=cs_name,
                                      alive_urls='None',
                                      area=area)
                    '''这里先添加数据,异步执行获取到的数据作为结果给下个进程使用
                    这里本来是要扫描c段开放端口,但是这样就相当于把耗时操作加载到同步执行的线程中
                    于是把扫描开放端口  放在获取ip详细信息线程中处理'''
                except Exception as e:
                    Except_Log(stat=86, url=url + '|转换IP地区编码失败|', error=str(e))
                    IP.objects.create(ip=ip,
                                      servers='None',
                                      host_type='None',
                                      cs=cs_name,
                                      alive_urls='None',
                                      area='获取失败')

            except Exception as e:
                Except_Log(stat=21, url=url + '|添加IP资源失败|', error=str(e))

    except Exception as e:
        Except_Log(stat=30, url=url + '|维护传入网址失败|', error=str(e))
예제 #8
0
def Add_Data_To_Url(url):
    '''
    2019-12-10
        1. 该函数作用为传入网址进行IP黑名单过滤
        2. 该函数作用为传入网址进行【网络资产数据入库,网址索引数据入库,主机资产数据入库,监控域名数量入库处理】
    '''
    time.sleep(random.randint(5, 20))
    time.sleep(random.randint(5, 20))
    time.sleep(random.randint(5, 20))
    close_old_connections()
    print('[+ Insert Url] 入库网址 : {}'.format(url))
    if '.gov.cn' in url or '.edu.cn' in url:
        return
    urlinblackurl = check_black(url, black_url)
    if urlinblackurl == True:
        print('[+ URL Blacklist] 当前网址触发黑名单 : {}'.format(url))
        return
    try:
        ip = get_host(url)
        if ip == '获取失败':
            return
        if ip in black_ip:
            '''触发IP黑名单机制'''
            print('[+ IP Blacklist] 当前IP触发黑名单 : {} --> {}'.format(ip, url))
            return
        try:
            test_url = list(URL.objects.filter(url=url))
        except:
            try:
                test_url = list(URL.objects.filter(url=url))
            except:
                close_old_connections()
                test_url = list(URL.objects.filter(url=url))

        if test_url != []:
            '''网址索引表如果已经有该网址,则直接退出'''
            return

        try:
            Test_Other_Url = Other_Url.objects.filter(url=url)
            '''判断网络资产表是否有这个数据,如果没有的话,就添加进去'''
            if list(Test_Other_Url) == []:
                res = Get_Url_Info(url).get_info()
                res_url = res.get('url')
                try:
                    res_title = pymysql.escape_string(res.get('title'))
                except Exception as e:
                    res_title = 'Error'
                    Except_Log(stat=11, url=url + '|网页内容转码失败', error=str(e))
                res_power = res.get('power')
                res_server = res.get('server')
                res_status = res.get('status')
                res_ip = ip
                try:
                    Other_Url.objects.create(url=res_url,
                                             title=res_title,
                                             power=res_power,
                                             server=res_server,
                                             status=res_status,
                                             ip=res_ip)
                except Exception as e:
                    close_old_connections()
                    Except_Log(stat=17, url=url + '|标题等信息编码不符合', error=str(e))
                    Other_Url.objects.create(url=res_url,
                                             title='Error',
                                             power='Error',
                                             server=res_server,
                                             status=res_status,
                                             ip=res_ip)
        except Exception as e:
            Except_Log(stat=29, url=url + '|网络资产表错误', error=str(e))
        try:
            '''
            再次获取状态码,判断是否符合入库状态,以保证数据统一
            这里添加网址资产到 索引表 和 清洗表
            '''
            test_url1 = list(URL.objects.filter(url=url))
            '''如果网址索引表有这个网站的话,就直接退出'''
            if test_url1 == []:
                URL.objects.create(url=url, ip=ip)
                '''添加网址到网址索引表'''
                try:
                    try:
                        ZHRND = Get_Url_Info(url)
                        Sconten = ZHRND.get_info()['content']
                        if Sconten == 'Error':
                            '''到这里说明获取网页内容失败了'''
                            # print('{}:获取网页内容失败'.format(url))
                            pass
                        else:
                            try:
                                blackconincon = check_black(Sconten, black_con)
                                if blackconincon == True:
                                    '''触发网页内容黑名单'''
                                    print(
                                        '[+ Cont Blacklist] 当前网页内容触发黑名单 : {}'.
                                        format(url))
                                    return None
                            except:
                                Sconten = '获取失败'
                        Show_contents = pymysql.escape_string(Sconten)
                        Cont = Content()
                        Cont.url = url
                        Cont.content = Show_contents
                        IP_Res = Get_Ip_Info(ip)
                        Show_cs = IP_Res.get_cs_name(ip)
                        Cont.save()
                        Show_Data.objects.create(url=url,
                                                 ip=ip,
                                                 cs=Show_cs,
                                                 content=Cont)
                    except Exception as e:
                        close_old_connections()
                        Except_Log(stat=4, url=url + '|外键添加错误', error=str(e))
                        Show_contents = 'Error'
                        Cont = Content()
                        Cont.url = url
                        Cont.content = Show_contents
                        IP_Res = Get_Ip_Info(ip)
                        Show_cs = IP_Res.get_cs_name(ip)
                        Cont.save()
                        Show_Data.objects.create(url=url,
                                                 ip=ip,
                                                 cs=Show_cs,
                                                 content=Cont)
                        '''添加网页内容,数据展示'''
                except Exception as e:
                    Except_Log(stat=8, url=url + '|外键添加错误', error=str(e))

            This_Sub = [x for x in ALL_DOMAINS if x in url]
            '''获取到当前子域名属于的主域名'''
            try:
                '''尝试进行域名总数据获取检测'''
                if This_Sub != []:
                    Domain_Count = Domains.objects.filter(url=This_Sub[0])[0]
                    counts = Other_Url.objects.filter(
                        url__contains=This_Sub[0])
                    Domain_Count.counts = str(len(counts))
                    # counts = int(Domain_Count.counts)+1
                    # Domain_Count.counts = counts
                    Domain_Count.save()
            except Exception as e:
                Except_Log(stat=15,
                           url=url + '|获取归属域名失败|' + This_Sub,
                           error=str(e))
        except Exception as e:
            Except_Log(stat=22, url=url + '|添加到网址索引表失败|', error=str(e))

        try:
            test_ip = list(IP.objects.filter(ip=ip))
        except:
            close_old_connections()
            test_ip = list(IP.objects.filter(ip=ip))
            '''开始添加ip 维护ip统一
            这里开始判断数据库中是否有这个ip,并且先添加然后修改(防止重复浪费资源)
            if test_ip != []:
                test_ip_0 = IP.objects.filter(ip=ip)[0]
                # 这里判断数据中IP时候存在,如果存在并且有扫描状态,就直接中断操作
                if test_ip_0.get == '是' or test_ip_0.get == '中':
                    return'''
        if test_ip == []:
            try:
                IP_Res = Get_Ip_Info(ip)
                area = IP_Res.get_ip_address(ip)
                cs_name = IP_Res.get_cs_name(ip)
                try:
                    IP.objects.create(ip=ip,
                                      servers='None',
                                      host_type='None',
                                      cs=cs_name,
                                      alive_urls='None',
                                      area=area)
                    '''这里先添加数据,异步执行获取到的数据作为结果给下个进程使用
                    这里本来是要扫描c段开放端口,但是这样就相当于把耗时操作加载到同步执行的线程中
                    于是把扫描开放端口  放在获取ip详细信息线程中处理'''
                except Exception as e:
                    Except_Log(stat=86, url=url + '|转换IP地区编码失败|', error=str(e))
                    IP.objects.create(ip=ip,
                                      servers='None',
                                      host_type='None',
                                      cs=cs_name,
                                      alive_urls='None',
                                      area='获取失败')

            except Exception as e:
                Except_Log(stat=21, url=url + '|添加IP资源失败|', error=str(e))

    except Exception as e:
        Except_Log(stat=30, url=url + '|维护传入网址失败|', error=str(e))
        Add_Data_To_Url(url)
예제 #9
0
파일: main.py 프로젝트: JmNkS/LangSrcCurise
def Add_Data_To_Url(url):
    try:
        ip = get_host(url)
        if ip == '获取失败':
            return
        # print('[+ Domain UrlIP] IP解析 --> {}  IP --> {}'.format(url, ip))
        test_url = list(URL.objects.filter(url=url))
        # 如果数据库有这个网站的话,就直接退出
        if test_url != []:
            return

        try:
            Test_Other_Url = Other_Url.objects.filter(url=url)
            # 判断网络资产表是否有这个数据,如果没有的话,就添加进去
            if list(Test_Other_Url) == []:
                res = Get_Url_Info(url).get_info()
                res_url = res.get('url')
                res_title = res.get('title')
                res_power = res.get('power')
                res_server = res.get('server')
                res_status = res.get('status')
                res_ip = ip
                if int(res_status) in Alive_Status:
                    # 添加的标准是 在入库状态码内
                    Other_Url.objects.create(url=res_url,
                                             title=res_title,
                                             power=res_power,
                                             server=res_server,
                                             status=res_status,
                                             ip=res_ip)
        except Exception as e:
            print('错误代码 [29] {}'.format(str(e)))
            Error_Log.objects.create(url=url,
                                     error='错误代码 [29] {}'.format(str(e)))

        try:
            res = Get_Url_Info(url).get_info()
            res_status = res.get('status')
            # 再次获取状态码,判断是否符合入库状态,以保证数据统一
            if int(res_status) not in Alive_Status:
                return

            # 接下来可以进行数据索引唯一统一

            URL.objects.create(url=url, ip=ip)
            # 添加 网址索引
            try:
                Show_contents = Get_Url_Info(url).Requests()[0]
                Cont = Content()
                Cont.url = url
                Cont.content = Show_contents
                Cont.save()
                Show_Data.objects.create(url=url, ip=ip, content=Cont)
                # 添加网页内容,数据展示
            except Exception as e:
                print('错误代码 [08] {}'.format(str(e)))
                Error_Log.objects.create(url='外键添加错误',
                                         error='错误代码 [08] {}'.format(str(e)))

            BA = Domains.objects.all()
            ALL_DOMAINS = [x.get('url') for x in BA.values()]
            # 所有监控域名
            # print('所有域名:{}'.format(ALL_DOMAINS))
            This_Sub = [x for x in ALL_DOMAINS if x in url]
            # 获取到当前子域名属于的主域名

            try:
                # 尝试进行域名总数据获取检测
                if This_Sub != []:
                    Domain_Count = Domains.objects.filter(url=This_Sub[0])[0]
                    counts = Other_Url.objects.filter(
                        url__contains=This_Sub[0])
                    Domain_Count.counts = str(len(counts))
                    # counts = int(Domain_Count.counts)+1
                    # Domain_Count.counts = counts
                    Domain_Count.save()
            except Exception as e:
                print('错误代码 [15] {}'.format(str(e)))
                Error_Log.objects.create(url=url + '|' + This_Sub,
                                         error='错误代码 [15] {}'.format(str(e)))
        except Exception as e:
            print('错误代码 [22] {}'.format(str(e)))
            Error_Log.objects.create(url=url,
                                     error='错误代码 [22] {}'.format(str(e)))

        test_ip = list(IP.objects.filter(ip=ip))
        # 开始添加ip 维护ip统一
        # 这里开始判断数据库中是否有这个ip,并且先添加然后修改(防止重复浪费资源)
        if test_ip != []:
            test_ip_0 = IP.objects.filter(ip=ip)[0]
            # 这里判断数据中IP时候存在,如果存在并且有扫描状态,就直接中断操作
            if test_ip_0.get == '是' or test_ip_0.get == '中':
                return
        if test_ip == []:
            try:
                IP_Res = Get_Ip_Info(ip)
                area = IP_Res.get_ip_address(ip)
                IP.objects.create(ip=ip,
                                  servers='None',
                                  host_type='None',
                                  alive_urls='None',
                                  area=area)
                # 这里先添加数据,异步执行获取到的数据作为结果给下个进程使用
            except Exception as e:
                print('错误代码 [21] {}'.format(str(e)))
                Error_Log.objects.create(url=url,
                                         error='错误代码 [21] {}'.format(str(e)))
    except Exception as e:
        print('错误代码 [30] {}'.format(str(e)))
        Error_Log.objects.create(url=url, error='错误代码 [30] {}'.format(str(e)))
예제 #10
0
def Add_Data_To_Url(url):
    time.sleep(random.randint(1,20))
    time.sleep(random.randint(1,20))
    time.sleep(random.randint(1,20))
    time.sleep(random.randint(1,20))
    time.sleep(random.randint(1,20))
    time.sleep(random.randint(1,20))
    time.sleep(random.randint(1,20))
    try:
        ip = get_host(url)
        if ip == '获取失败':
            return
        # print('[+ Domain UrlIP] IP解析 --> {}  IP --> {}'.format(url, ip))
        test_url = list(URL.objects.filter(url=url))
        # 如果数据库有这个网站的话,就直接退出
        if test_url != []:
            return

        try:
            Test_Other_Url = Other_Url.objects.filter(url=url)
            # 判断网络资产表是否有这个数据,如果没有的话,就添加进去
            if list(Test_Other_Url) == []:
                res = Get_Url_Info(url).get_info()
                res_url = res.get('url')
                res_title = res.get('title')
                res_power = res.get('power')
                res_server = res.get('server')
                res_status = res.get('status')
                res_ip = ip
                #if int(res_status) in Alive_Status:
                    # 添加的标准是 在入库状态码内
                Other_Url.objects.create(url=res_url, title=res_title, power=res_power, server=res_server,
                                             status=res_status,ip=res_ip)

        except Exception as e:
            print('错误代码 [29] {}'.format(str(e)))
            Error_Log.objects.create(url=url, error='错误代码 [29] {}'.format(str(e)))

        try:
            # res = Get_Url_Info(url).get_info()
            # res_status = res.get('status')
            # 再次获取状态码,判断是否符合入库状态,以保证数据统一
            # if int(res_status) not in Alive_Status:
            #     return

            # 接下来可以进行数据索引唯一统一
            test_url1 = list(URL.objects.filter(url=url))
            # 如果数据库有这个网站的话,就直接退出

            if test_url1 == []:
                URL.objects.create(url=url,ip=ip)
                # 添加 网址索引
                try:
                    Show_contents = Get_Url_Info(url).Requests()[0]
                    Cont = Content()
                    Cont.url = url
                    Cont.content = Show_contents
                    IP_Res = Get_Ip_Info(ip)
                    Show_cs = IP_Res.get_cs_name(ip)
                    Cont.save()
                    Show_Data.objects.create(url=url, ip=ip,cs=Show_cs, content=Cont)
                    # 添加网页内容,数据展示
                except Exception as e:
                    print('错误代码 [08] {}'.format(str(e)))
                    Error_Log.objects.create(url='外键添加错误', error='错误代码 [08] {}'.format(str(e)))

            BA = Domains.objects.all()
            ALL_DOMAINS = [x.get('url') for x in BA.values()]
            # 所有监控域名
            # print('所有域名:{}'.format(ALL_DOMAINS))
            This_Sub = [x for x in ALL_DOMAINS if x in url]
            # 获取到当前子域名属于的主域名

            try:
                # 尝试进行域名总数据获取检测
                if This_Sub != []:
                    Domain_Count = Domains.objects.filter(url=This_Sub[0])[0]
                    counts = Other_Url.objects.filter(url__contains=This_Sub[0])
                    Domain_Count.counts = str(len(counts))
                    # counts = int(Domain_Count.counts)+1
                    # Domain_Count.counts = counts
                    Domain_Count.save()
            except Exception as e:
                print('错误代码 [15] {}'.format(str(e)))
                Error_Log.objects.create(url=url+'|'+This_Sub, error='错误代码 [15] {}'.format(str(e)))
        except Exception as e:
            print('错误代码 [22] {}'.format(str(e)))
            Error_Log.objects.create(url=url, error='错误代码 [22] {}'.format(str(e)))

        test_ip = list(IP.objects.filter(ip=ip))
        # 开始添加ip 维护ip统一
        # 这里开始判断数据库中是否有这个ip,并且先添加然后修改(防止重复浪费资源)
        if test_ip != []:
            test_ip_0 = IP.objects.filter(ip=ip)[0]
            # 这里判断数据中IP时候存在,如果存在并且有扫描状态,就直接中断操作
            if test_ip_0.get == '是' or test_ip_0.get == '中':
                return
        if test_ip ==[]:
            try:
                IP_Res = Get_Ip_Info(ip)
                area = IP_Res.get_ip_address(ip)
                cs_name = IP_Res.get_cs_name(ip)
                IP.objects.create(ip=ip, servers='None', host_type='None', cs=cs_name,alive_urls='None', area=area)
                # 这里先添加数据,异步执行获取到的数据作为结果给下个进程使用

                cs_ips = [str(x) for x in list(IP_Res.get_cs_ips(ip).values())[0]]
                # 整个 C 段的数据ip
                if ip in cs_ips:
                    cs_ips.remove(ip)

                Read_to_check_host = set()
                for cs_ip in cs_ips:
                    indata = list(IP.objects.filter(ip=str(cs_ip)))
                    if indata== [] and cs_ip != ip:
                        Read_to_check_host.add(cs_ip)

                Alive_Hosts = IP_Res.get_alive_hosts(Read_to_check_host)
                print('[+ CHost Scaner] {} 段存活主机 : {}台'.format(cs_name,len(Alive_Hosts)))
                if Alive_Hosts == []:
                    return
                for alive_host in Alive_Hosts:
                    try:
                        checkindata = list(IP.objects.filter(ip=str(alive_host)))
                        if checkindata == [] :
                            # 最后一次数据判断匹配
                            c_ip = str(alive_host)
                            c_cs = cs_name
                            c_area = IP_Res.get_ip_address(c_ip)
                            IP.objects.create(ip=c_ip, servers='None', host_type='None', cs=c_cs, alive_urls='None',
                                              area=c_area)
                    except Exception as e:
                        print('错误代码 [03] {}'.format(str(e)))
                        Error_Log.objects.create(url=url, error='错误代码 [03] {}'.format(str(e)))

            except Exception as e:
                print('错误代码 [21] {}'.format(str(e)))
                Error_Log.objects.create(url=url, error='错误代码 [21] {}'.format(str(e)))
    except Exception as e:
        print('错误代码 [30] {}'.format(str(e)))
        Error_Log.objects.create(url=url, error='错误代码 [30] {}'.format(str(e)))