def _check_whois_v1(self, domain_data: OnSiteLink):
     root_domain = domain_data.link
     try:
         if root_domain.startswith("http"):
             root_domain = LinkChecker.get_root_domain(domain_data.link)[1]
         real_response_code = domain_data.response_code
         whois = LinkChecker.check_whois(root_domain)  # check whois record
         if whois[0]:
             if whois[2]:  # domain is expired
                 real_response_code = ResponseCode.Expired
             else:
                 real_response_code = ResponseCode.MightBeExpired
         if real_response_code == ResponseCode.Expired:
             #if ResponseCode.domain_might_be_expired(real_response_code):
             domain_data.link = root_domain
             domain_data.response_code = real_response_code
             #return_obj = OnSiteLink(root_domain, real_response_code, domain_data.link_level, OnSiteLink.TypeOutbound)
             # if isinstance(self._queue_lock, multiprocessing.RLock):
             with self._queue_lock:
                 self._output_q.put(
                     (domain_data.link, domain_data.response_code))
     except Exception as ex:
         ErrorLogger.log_error("ExternalSiteChecker.WhoisChecker", ex,
                               "_check_whois() " + root_domain)
     finally:
         self._add_job_done_one()
 def _check_whois(self, domain_data: OnSiteLink):
     root_domain = domain_data.link.lower()
     try:
         if not self._is_debug:
             if root_domain.startswith("http"):
                 root_domain = LinkChecker.get_root_domain(
                     domain_data.link)[1]
             is_available, is_redemption = LinkChecker.is_domain_available_whois(
                 root_domain)  # check whois record
             if is_available or is_redemption:
                 if is_available:
                     real_response_code = ResponseCode.Expired
                 else:
                     real_response_code = ResponseCode.MightBeExpired
                 domain_data.link = root_domain
                 domain_data.response_code = real_response_code
                 #return_obj = OnSiteLink(root_domain, real_response_code, domain_data.link_level, OnSiteLink.TypeOutbound)
                 self._put_output_result_in_queue(domain_data)
         else:
             self._put_output_result_in_queue(domain_data)
     except Exception as ex:
         ErrorLogger.log_error("ExternalSiteChecker.WhoisChecker", ex,
                               "_check_whois() " + root_domain)
     finally:
         self._add_job_done_one()
def check_whois_with_dns(page: OnSiteLink):

    real_response_code = ResponseCode.DNSError
    skip_whois_check = False
    try:
        root_result = LinkChecker.get_root_domain(page.link)
        root_domain = root_result[1]
        sub_domain = root_result[4]
        suffix = root_result[5]

        if len(sub_domain) == 0 or suffix not in TldUtility.TOP_TLD_LIST:
            skip_whois_check = True
        else:

            if LinkChecker.is_domain_DNS_OK(sub_domain):  # check DNS first
                real_response_code = ResponseCode.NoDNSError
                skip_whois_check = True
            elif not sub_domain.startswith("www."):
                if LinkChecker.is_domain_DNS_OK("www." + root_domain):
                    real_response_code = ResponseCode.NoDNSError
                    skip_whois_check = True
                # response = LinkChecker.get_response(page.link, timeout)  # check 404 error

            page.response_code = real_response_code
            page.link_type = OnSiteLink.TypeOutbound
            page.link = root_domain

    except Exception as ex:
        # ErrorLogger.log_error("WhoisChecker", ex, "_check_whois_with_dns() " + page.link)
        skip_whois_check = True
    finally:
        if not skip_whois_check and real_response_code == ResponseCode.DNSError:
            return check_whois(page)
        else:
            return page.link, page.response_code
Example #4
0
def check_whois_with_dns(page: OnSiteLink):

    real_response_code = ResponseCode.DNSError
    skip_whois_check = False
    try:
        root_result = LinkChecker.get_root_domain(page.link)
        root_domain = root_result[1]
        sub_domain = root_result[4]
        suffix = root_result[5]

        if len(sub_domain) == 0 or suffix not in TldUtility.TOP_TLD_LIST:
            skip_whois_check = True
        else:

            if LinkChecker.is_domain_DNS_OK(sub_domain):  # check DNS first
                real_response_code = ResponseCode.NoDNSError
                skip_whois_check = True
            elif not sub_domain.startswith("www."):
                if LinkChecker.is_domain_DNS_OK("www." + root_domain):
                    real_response_code = ResponseCode.NoDNSError
                    skip_whois_check = True
                # response = LinkChecker.get_response(page.link, timeout)  # check 404 error

            page.response_code = real_response_code
            page.link_type = OnSiteLink.TypeOutbound
            page.link = root_domain

    except Exception as ex:
        # ErrorLogger.log_error("WhoisChecker", ex, "_check_whois_with_dns() " + page.link)
        skip_whois_check = True
    finally:
        if not skip_whois_check and real_response_code == ResponseCode.DNSError:
            return check_whois(page)
        else:
            return page.link, page.response_code
    def check_external_page(checker: SiteChecker, page: OnSiteLink, timeout=10):
        """
        check DNS Error Only
        :param checker:
        :param page:
        :param timeout:
        :return:
        """
        # response = LinkChecker.get_response(page.link, timeout)
        #real_response_code = response[0]
        #real_response_code = ResponseCode.LinkOK

        #print("-------checking external " + page.link)
        try:
            root_result = LinkChecker.get_root_domain(page.link)
            root_domain = root_result[1]
            sub_domain = root_result[4]

            if len(sub_domain) == 0 or root_domain in checker.external_cache_list:
                return
            else:
                if len(checker.external_cache_list) < checker.external_cache_size:
                    checker.external_cache_list.append(root_domain)

            real_response_code = page.response_code
            if LinkChecker.is_domain_DNS_OK(sub_domain):  # check DNS first
                real_response_code = ResponseCode.NoDNSError
            elif not sub_domain.startswith("www."):
                if LinkChecker.is_domain_DNS_OK("www." + root_domain):
                    real_response_code = ResponseCode.NoDNSError
                # response = LinkChecker.get_response(page.link, timeout)  # check 404 error

            page.response_code = real_response_code
            page.link_type = OnSiteLink.TypeOutbound
            page.link = root_domain
            #print(" ready to output external:", str(page))
            if checker.output_all_external or ResponseCode.domain_might_be_expired(real_response_code):
                    # if checker.delegate is not None:
                    #     checker.delegate(new_page)
                if checker.output_queue is not None:
                    with checker._queue_lock:
                        checker.output_queue.put(page)
        except Exception as ex:
            PrintLogger.print(ex)
            ErrorLogger.log_error("PageChecker", ex, "check_external_page() " + page.link)
def check_whois(domain_data: OnSiteLink):
    root_domain = domain_data.link.lower()
    try:
        if root_domain.startswith("http"):
            root_domain = LinkChecker.get_root_domain(domain_data.link)[1]
        is_available, is_redemption = LinkChecker.is_domain_available_whois(root_domain)  # check whois record
        if is_available or is_redemption:
            if is_available:
                real_response_code = ResponseCode.Expired
            else:
                real_response_code = ResponseCode.MightBeExpired
            domain_data.link = root_domain
            domain_data.response_code = real_response_code
            #return_obj = OnSiteLink(root_domain, real_response_code, domain_data.link_level, OnSiteLink.TypeOutbound)
            # self._output_q.put((domain_data.link, domain_data.response_code))
    except Exception as ex:
        print(ex)
    finally:
        return domain_data.link, domain_data.response_code
Example #7
0
def check_whois(domain_data: OnSiteLink):
    root_domain = domain_data.link.lower()
    try:
        if root_domain.startswith("http"):
            root_domain = LinkChecker.get_root_domain(domain_data.link)[1]
        is_available, is_redemption = LinkChecker.is_domain_available_whois(
            root_domain)  # check whois record
        if is_available or is_redemption:
            if is_available:
                real_response_code = ResponseCode.Expired
            else:
                real_response_code = ResponseCode.MightBeExpired
            domain_data.link = root_domain
            domain_data.response_code = real_response_code
            #return_obj = OnSiteLink(root_domain, real_response_code, domain_data.link_level, OnSiteLink.TypeOutbound)
            # self._output_q.put((domain_data.link, domain_data.response_code))
    except Exception as ex:
        print(ex)
    finally:
        return domain_data.link, domain_data.response_code
 def _check_whois(self, domain_data: OnSiteLink):
     root_domain = domain_data.link.lower()
     try:
         if not self._is_debug:
             if root_domain.startswith("http"):
                 root_domain = LinkChecker.get_root_domain(domain_data.link)[1]
             is_available, is_redemption = LinkChecker.is_domain_available_whois(root_domain)  # check whois record
             if is_available or is_redemption:
                 if is_available:
                     real_response_code = ResponseCode.Expired
                 else:
                     real_response_code = ResponseCode.MightBeExpired
                 domain_data.link = root_domain
                 domain_data.response_code = real_response_code
             #return_obj = OnSiteLink(root_domain, real_response_code, domain_data.link_level, OnSiteLink.TypeOutbound)
                 self._put_output_result_in_queue(domain_data)
         else:
             self._put_output_result_in_queue(domain_data)
     except Exception as ex:
         ErrorLogger.log_error("ExternalSiteChecker.WhoisChecker", ex, "_check_whois() " + root_domain)
     finally:
         self._add_job_done_one()
 def _check_whois_v1(self, domain_data: OnSiteLink):
     root_domain = domain_data.link
     try:
         if root_domain.startswith("http"):
             root_domain = LinkChecker.get_root_domain(domain_data.link)[1]
         real_response_code = domain_data.response_code
         whois = LinkChecker.check_whois(root_domain)  # check whois record
         if whois[0]:
             if whois[2]:  # domain is expired
                 real_response_code = ResponseCode.Expired
             else:
                 real_response_code = ResponseCode.MightBeExpired
         if real_response_code == ResponseCode.Expired:
         #if ResponseCode.domain_might_be_expired(real_response_code):
             domain_data.link = root_domain
             domain_data.response_code = real_response_code
             #return_obj = OnSiteLink(root_domain, real_response_code, domain_data.link_level, OnSiteLink.TypeOutbound)
             # if isinstance(self._queue_lock, multiprocessing.RLock):
             with self._queue_lock:
                 self._output_q.put((domain_data.link, domain_data.response_code))
     except Exception as ex:
         ErrorLogger.log_error("ExternalSiteChecker.WhoisChecker", ex, "_check_whois() " + root_domain)
     finally:
         self._add_job_done_one()