Python Parser 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: hydra

클래스/타입: Parser

hotexamples.com에서의 예제들: 2

Python Parser - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 hydra.Parser에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Parser(1)

feed_me(1)

자주 사용되는 메소드들

Parser (1)

feed_me (1)

예제 #1

파일 보기

class TestCases(unittest.TestCase):
    # Open and close file from data/
    def setUp(self):
        self.testfile = open(HTMLDATA)
        self.data = self.testfile.read()
        self.url = "https://example.com"
        self.check = Checker(self.url)
        self.parser = Parser()

    def tearDown(self):
        self.testfile.close()

    # Parser gives expected values
    def test_parser_expected_output(self):
        links = self.parser.feed_me(self.data)
        expected_output = [
            "style.css",
            "scripts.js",
            "http://baddomain.com/i-donut-exist",
            "image.png",
            "www.anotherbaddomain.com/multithreading-is-fun",
            "https://example.com/i-have-links",
            "https://example.com",
        ]
        self.assertEqual(links, expected_output)

    # Checker uses correct domain for comparison
    def test_domain_extraction(self):
        self.assertEqual(self.check.extract_domain(self.url), "example.com")

    # Checker doesn't add visited links to queue
    def test_process_queue_length(self):
        self.pagedata = {
            "url": "https://example.com/test-page.html",
            "parent": "https://example.com/test-page.html",
            "data":
            '<!DOCTYPE html>\n<html>\n\n    <head>\n        <title>Test Data Page</title>\n\n        <meta charset="utf-8">\n        <meta http-equiv="Content-type" content="text/html; charset=UTF-8">\n        <meta name="viewport" content="width=device-width, initial-scale=1">\n        <link rel="stylesheet" href="style.css" type="text/css">\n        <script type="text/javascript" src="scripts.js"></script>\n    </head>\n\n    <body>\n        <div>\n            <h1>Test Data Page</h1>\n            <p>This page does not exist: <a href="/i-donut-exist">Whale</a></p>\n            <p>This is not a link: <a>No Spoon</a></p>\n            <img src="image.png" />\n            <p>This page does not exist: <a href="/multithreading-is-fun">Petunias</a></p>\n            <p>This page contains more links: <a href="/i-have-links">Crawl Me</a></p>\n            <p>This domain is for use in illustrative examples in documents. You may use this\n                domain in literature without prior coordination or asking for permission: <a\n                    href="https://example.com">Example</a></p>\n        </div>\n\n\n    </body>\n\n</html>',
            "valid_content_type": True,
        }
        # There are 7 links in pagedata["data"]
        first_parse = 7
        self.check.parse_page(self.pagedata)
        self.assertEqual(len(self.check.TO_PROCESS.queue), first_parse)
        self.check.visited.add("https://example.com/style.css")
        # Checker should add to queue all but the one visited link
        second_parse = 13
        self.check.parse_page(self.pagedata)
        self.assertEqual(len(self.check.TO_PROCESS.queue), second_parse)

예제 #2

파일 보기

 def setUp(self):
     self.testfile = open(HTMLDATA)
     self.data = self.testfile.read()
     self.url = "https://example.com"
     self.check = Checker(self.url, Config())
     self.parser = Parser(Config())