Python LinkChecker.get_response 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: DomainFinderSrc.Scrapers.LinkChecker

클래스/타입: LinkChecker

메소드/함수: get_response

hotexamples.com에서의 예제들: 6

Python LinkChecker.get_response - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 DomainFinderSrc.Scrapers.LinkChecker.LinkChecker.get_response에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

get_root_domain(13)

get_page_source(6)

get_valid_link(5)

get_robot_agent(5)

get_response(3)

get_shorter_url_path(2)

is_domain_available_whois(2)

is_domain_DNS_OK(2)

might_be_link_html_page(2)

get_all_links_from_source(2)

get_link_extension(1)

get_webpage_links_from_source(1)

get_common_web_resource(1)

get_common_request_session(1)

is_domain_forbidden(1)

is_external_link(1)

check_whois(1)

예제 #1

파일 보기

파일: ArchiveOrgTest.py 프로젝트: paulnaoki/DomainFinderSrcUniversal

def test_response(link: str) -> bool:
    status_code, content_type = LinkChecker.get_response(link)
    if status_code != 200:
        print(link, "status bad:", status_code, " content: ", content_type)
        return False
    else:
        print(link, "status good:", status_code, " content: ", content_type)
        return True

예제 #2

파일 보기

파일: ArchiveOrgTest.py 프로젝트: paulnaoki/DomainFinderSrcUniversal

def test_response(link: str) -> bool:
    status_code, content_type = LinkChecker.get_response(link)
    if status_code != 200:
        print(link, "status bad:", status_code, " content: ", content_type)
        return False
    else:
        print(link, "status good:", status_code, " content: ", content_type)
        return True

예제 #3

파일 보기

파일: ArchiveExplore.py 프로젝트: paulnaoki/DomainFinderSrcUniversal

    def scrape_web_res(self, page: LinkAttrs):
        print("look:", page.link, "level: ", page.level)
        try:
            if len(page.path) > ArchiveExplorer.MAX_PATH_LEN:  # max file path in any file system
                raise OSError("file path is too long:" + page.path)
            response_code, content_type = LinkChecker.get_response(page.link)
            if response_code not in [ResponseCode.LinkOK, ResponseCode.LinkFound, ResponseCode.LinkRedirect]:
                raise ConnectionError("res is not available: " + page.link)
            if page.res_type in [LinkUtility.EXT_WEBPAGE, LinkUtility.EXT_CSS, LinkUtility.EXT_JS]:  # parse a webpage
                save_text = self._parse_text_res(page)
                self._file_manager.write_to_file(page.path, save_text)
            # elif page.res_type != LinkUtility.EXT_OTHER:  # TODO: download normal resources
            #     response = LinkChecker.get_common_web_resource(page.link)
            #     if page.res_type == LinkUtility.EXT_IMAGE or page.res_type == LinkUtility.EXT_FONT:
            #         self._downloader.write_to_file(page.path, response.content, mode="b")
            #     else:
            #         self._downloader.write_to_file(page.path, response.text, mode="t")
            else:
                # response = LinkChecker.get_common_web_resource(page.link)
                # self._downloader.write_to_file(page.path, response.content, mode="b")
                self._file_manager.download_file(sub_path=page.path, url=page.link, timeout=self._timeout,
                                                 redirect=self._max_redirect, retries=self._max_retries)
        except Exception as ex:
            print("exception:", ex)
            print("broken res:", page)
            with self._sync_lock:
                self._file_manager.write_to_error_log(page.to_tuple(str(ex)))
                if page.res_type == LinkUtility.EXT_WEBPAGE:
                    self._broken_webpage_count += 1
                elif page.res_type == LinkUtility.EXT_CSS:
                    self._broken_css_count += 1
                elif page.res_type == LinkUtility.EXT_IMAGE:
                    self._broken_image_count += 1
                elif page.res_type == LinkUtility.EXT_JS:
                    self._broken_js_count += 1
                else:
                    self._broken_others_count += 1

                self._broken_res_list.append(page)
        finally:
            with self._sync_lock:
                self._total_res_done += 1
                if page.res_type == LinkUtility.EXT_WEBPAGE:
                    self._total_webpage_count += 1
                elif page.res_type == LinkUtility.EXT_CSS:
                    self._total_css_count += 1
                elif page.res_type == LinkUtility.EXT_IMAGE:
                    self._total_image_count += 1
                elif page.res_type == LinkUtility.EXT_JS:
                    self._total_js_count += 1
                else:
                    self._total_others_count += 1

예제 #4

파일 보기

def test_response(link: str) -> (bool, str):
    # link_cls, ext = LinkUtility.get_link_class(link)
    status_code, content_type = LinkChecker.get_response(link)
    link_cls, ext = LinkUtility.get_link_class(link.rstrip('/'))
    # print("checking link:", link, " link cls:", link_cls, " ext:", ext)
    # if "image" in content_type:
    #     link_cls = LinkUtility.EXT_WEBPAGE
    # elif "html" in content_type:
    #     link_cls = LinkUtility.EXT_WEBPAGE
    # elif "css" in content_type:
    #     link_cls = LinkUtility.EXT_CSS
    # elif "javascript" in content_type:
    #     link_cls = LinkUtility.EXT_JS
    # else:
    #     link_cls = LinkUtility.EXT_OTHER

    if status_code != 200:
        # print(link, "status bad:", status_code, " content: ", content_type)
        return False, link_cls
    else:
        # print(link, "status good:", status_code, " content: ", content_type)
        return True, link_cls

예제 #5

파일 보기

파일: ProfileExtract.py 프로젝트: paulnaoki/DomainFinderSrcUniversal

def test_response(link: str) -> (bool, str):
    # link_cls, ext = LinkUtility.get_link_class(link)
    status_code, content_type = LinkChecker.get_response(link)
    link_cls, ext = LinkUtility.get_link_class(link.rstrip('/'))
    # print("checking link:", link, " link cls:", link_cls, " ext:", ext)
    # if "image" in content_type:
    #     link_cls = LinkUtility.EXT_WEBPAGE
    # elif "html" in content_type:
    #     link_cls = LinkUtility.EXT_WEBPAGE
    # elif "css" in content_type:
    #     link_cls = LinkUtility.EXT_CSS
    # elif "javascript" in content_type:
    #     link_cls = LinkUtility.EXT_JS
    # else:
    #     link_cls = LinkUtility.EXT_OTHER

    if status_code != 200:
        # print(link, "status bad:", status_code, " content: ", content_type)
        return False, link_cls
    else:
        # print(link, "status good:", status_code, " content: ", content_type)
        return True, link_cls

예제 #6

파일 보기

    def scrape_web_res(self, page: LinkAttrs):
        print("look:", page.link, "level: ", page.level)
        try:
            if len(
                    page.path
            ) > ArchiveExplorer.MAX_PATH_LEN:  # max file path in any file system
                raise OSError("file path is too long:" + page.path)
            response_code, content_type = LinkChecker.get_response(page.link)
            if response_code not in [
                    ResponseCode.LinkOK, ResponseCode.LinkFound,
                    ResponseCode.LinkRedirect
            ]:
                raise ConnectionError("res is not available: " + page.link)
            if page.res_type in [
                    LinkUtility.EXT_WEBPAGE, LinkUtility.EXT_CSS,
                    LinkUtility.EXT_JS
            ]:  # parse a webpage
                save_text = self._parse_text_res(page)
                self._file_manager.write_to_file(page.path, save_text)
            # elif page.res_type != LinkUtility.EXT_OTHER:  # TODO: download normal resources
            #     response = LinkChecker.get_common_web_resource(page.link)
            #     if page.res_type == LinkUtility.EXT_IMAGE or page.res_type == LinkUtility.EXT_FONT:
            #         self._downloader.write_to_file(page.path, response.content, mode="b")
            #     else:
            #         self._downloader.write_to_file(page.path, response.text, mode="t")
            else:
                # response = LinkChecker.get_common_web_resource(page.link)
                # self._downloader.write_to_file(page.path, response.content, mode="b")
                self._file_manager.download_file(sub_path=page.path,
                                                 url=page.link,
                                                 timeout=self._timeout,
                                                 redirect=self._max_redirect,
                                                 retries=self._max_retries)
        except Exception as ex:
            print("exception:", ex)
            print("broken res:", page)
            with self._sync_lock:
                self._file_manager.write_to_error_log(page.to_tuple(str(ex)))
                if page.res_type == LinkUtility.EXT_WEBPAGE:
                    self._broken_webpage_count += 1
                elif page.res_type == LinkUtility.EXT_CSS:
                    self._broken_css_count += 1
                elif page.res_type == LinkUtility.EXT_IMAGE:
                    self._broken_image_count += 1
                elif page.res_type == LinkUtility.EXT_JS:
                    self._broken_js_count += 1
                else:
                    self._broken_others_count += 1

                self._broken_res_list.append(page)
        finally:
            with self._sync_lock:
                self._total_res_done += 1
                if page.res_type == LinkUtility.EXT_WEBPAGE:
                    self._total_webpage_count += 1
                elif page.res_type == LinkUtility.EXT_CSS:
                    self._total_css_count += 1
                elif page.res_type == LinkUtility.EXT_IMAGE:
                    self._total_image_count += 1
                elif page.res_type == LinkUtility.EXT_JS:
                    self._total_js_count += 1
                else:
                    self._total_others_count += 1