Exemple #1
0
def check_whois_with_dns(page: OnSiteLink):

    real_response_code = ResponseCode.DNSError
    skip_whois_check = False
    try:
        root_result = LinkChecker.get_root_domain(page.link)
        root_domain = root_result[1]
        sub_domain = root_result[4]
        suffix = root_result[5]

        if len(sub_domain) == 0 or suffix not in TldUtility.TOP_TLD_LIST:
            skip_whois_check = True
        else:

            if LinkChecker.is_domain_DNS_OK(sub_domain):  # check DNS first
                real_response_code = ResponseCode.NoDNSError
                skip_whois_check = True
            elif not sub_domain.startswith("www."):
                if LinkChecker.is_domain_DNS_OK("www." + root_domain):
                    real_response_code = ResponseCode.NoDNSError
                    skip_whois_check = True
                # response = LinkChecker.get_response(page.link, timeout)  # check 404 error

            page.response_code = real_response_code
            page.link_type = OnSiteLink.TypeOutbound
            page.link = root_domain

    except Exception as ex:
        # ErrorLogger.log_error("WhoisChecker", ex, "_check_whois_with_dns() " + page.link)
        skip_whois_check = True
    finally:
        if not skip_whois_check and real_response_code == ResponseCode.DNSError:
            return check_whois(page)
        else:
            return page.link, page.response_code
def check_whois_with_dns(page: OnSiteLink):

    real_response_code = ResponseCode.DNSError
    skip_whois_check = False
    try:
        root_result = LinkChecker.get_root_domain(page.link)
        root_domain = root_result[1]
        sub_domain = root_result[4]
        suffix = root_result[5]

        if len(sub_domain) == 0 or suffix not in TldUtility.TOP_TLD_LIST:
            skip_whois_check = True
        else:

            if LinkChecker.is_domain_DNS_OK(sub_domain):  # check DNS first
                real_response_code = ResponseCode.NoDNSError
                skip_whois_check = True
            elif not sub_domain.startswith("www."):
                if LinkChecker.is_domain_DNS_OK("www." + root_domain):
                    real_response_code = ResponseCode.NoDNSError
                    skip_whois_check = True
                # response = LinkChecker.get_response(page.link, timeout)  # check 404 error

            page.response_code = real_response_code
            page.link_type = OnSiteLink.TypeOutbound
            page.link = root_domain

    except Exception as ex:
        # ErrorLogger.log_error("WhoisChecker", ex, "_check_whois_with_dns() " + page.link)
        skip_whois_check = True
    finally:
        if not skip_whois_check and real_response_code == ResponseCode.DNSError:
            return check_whois(page)
        else:
            return page.link, page.response_code
    def check_external_page(checker: SiteChecker, page: OnSiteLink, timeout=10):
        """
        check DNS Error Only
        :param checker:
        :param page:
        :param timeout:
        :return:
        """
        # response = LinkChecker.get_response(page.link, timeout)
        #real_response_code = response[0]
        #real_response_code = ResponseCode.LinkOK

        #print("-------checking external " + page.link)
        try:
            root_result = LinkChecker.get_root_domain(page.link)
            root_domain = root_result[1]
            sub_domain = root_result[4]

            if len(sub_domain) == 0 or root_domain in checker.external_cache_list:
                return
            else:
                if len(checker.external_cache_list) < checker.external_cache_size:
                    checker.external_cache_list.append(root_domain)

            real_response_code = page.response_code
            if LinkChecker.is_domain_DNS_OK(sub_domain):  # check DNS first
                real_response_code = ResponseCode.NoDNSError
            elif not sub_domain.startswith("www."):
                if LinkChecker.is_domain_DNS_OK("www." + root_domain):
                    real_response_code = ResponseCode.NoDNSError
                # response = LinkChecker.get_response(page.link, timeout)  # check 404 error

            page.response_code = real_response_code
            page.link_type = OnSiteLink.TypeOutbound
            page.link = root_domain
            #print(" ready to output external:", str(page))
            if checker.output_all_external or ResponseCode.domain_might_be_expired(real_response_code):
                    # if checker.delegate is not None:
                    #     checker.delegate(new_page)
                if checker.output_queue is not None:
                    with checker._queue_lock:
                        checker.output_queue.put(page)
        except Exception as ex:
            PrintLogger.print(ex)
            ErrorLogger.log_error("PageChecker", ex, "check_external_page() " + page.link)