Example #1
0
 def test_get_elements_from_page(self, xpath, expected):
     self.page_spider = PageSpider()
     self.web_spider = WebSpider('http://sh.meituan.com', url_generator,
                                 {xpath: XpathCaller()}, WebCaller())
     result = self.web_spider.get_into_new_page(
         page_spider=self.page_spider)
     assert_equal(result, expected)
Example #2
0
class TestElements(unittest.TestCase):
    def setUp(self):
        self.spider = PageSpider()
        self.spider.spider_page('http://sh.meituan.com')

    @parameterized.expand([
        ('//h1/a', ([[
            '<a class="site-logo" gaevent="header/logo" href="http://sh.meituan.com">上海团购</a>'
        ]], [], [])),
        ('/h1/a', ([[
            '<a class="site-logo" gaevent="header/logo" href="http://sh.meituan.com">上海团购</a>'
        ]], [], [])), ('//h1/a/text()', ([], [['上海团购']], [])),
        ('/h1/a/@', ([], [], [[{
            'class': ['site-logo'],
            'href': 'http://sh.meituan.com',
            'gaevent': 'header/logo'
        }]])), ('//h1/a/@href', ([], [], [[{
            'href': 'http://sh.meituan.com'
        }]])),
        ('//h1/a/@', ([], [], [[{
            'class': ['site-logo'],
            'href': 'http://sh.meituan.com',
            'gaevent': 'header/logo'
        }]])),
        ('/div[@class="J-nav-item"]/dl/dt/a[@class="nav-level1__label",@hidefocus="true"]/@href',
         ([], [], [[{
             'href': 'http://sh.meituan.com/category/meishi'
         }]])),
        ('/div[@class="J-nav-item"]/dl/dt/a[@class="nav-level1__label",@hidefocus="true"]/text()',
         ([], [['美食']], []))
    ])
    def test_get_specified_elements(self, xpath, expected):
        self.spider.parse_xpath_of_tag(xpath)
        self.spider.find_tag(self.spider._soup, *tuple(self.spider.xpath))
        self.spider.get_specified_elements()
        assert_equal(
            (self.spider.contexts, self.spider.text, self.spider.attributes),
            expected)
Example #3
0
class TestTags(unittest.TestCase):
    def setUp(self):
        self.spider = PageSpider()
        self.spider.spider_page('http://sh.meituan.com')

    @parameterized.expand([
        ('//h1/a', 1), ('/h1/a', 1), ('//h1/a/text()', 1), ('/h1/a/@', 1),
        ('//h1/a/@href', 1), ('//h1/a/@', 1),
        ('//div[@class="J-nav-item"]/dl/dt/a[@class="nav-level1__label",@hidefocus="true"]/@href',
         9),
        ('/div[@class="J-nav-item"]/dl/dt/a[@class="nav-level1__label",@hidefocus="true"]/text()',
         1)
    ])
    def test_find_tag(self, xpath, expected):
        self.spider.parse_xpath_of_tag(xpath)
        self.spider.find_tag(self.spider._soup, *tuple(self.spider.xpath))
        assert_equal(len(self.spider.tags), expected)
Example #4
0
 def setUp(self):
     self.spider = PageSpider()
     self.spider.spider_page('http://sh.meituan.com')
Example #5
0
class TestXpath(unittest.TestCase):
    def setUp(self):
        self.spider = PageSpider()
        self.spider.spider_page('http://sh.meituan.com')

    @parameterized.expand([
        ('//h1/a', ([{
            'h1': {}
        }, {
            'a': {}
        }], '', True)), ('/h1/a', ([{
            'h1': {}
        }, {
            'a': {}
        }], '', False)),
        ('//h1/a/text()', ([{
            'h1': {}
        }, {
            'a': {}
        }], 'text()', True)), ('/h1/a/@', ([{
            'h1': {}
        }, {
            'a': {}
        }], '@', False)),
        ('//h1/a/@href', ([{
            'h1': {}
        }, {
            'a': {}
        }], '@href', True)), ('//h1/a/@', ([{
            'h1': {}
        }, {
            'a': {}
        }], '@', True)),
        ('/div[@class="J-nav-item"]/dl/dt/a[@class="nav-level1__label",@hidefocus="true"]/@href',
         ([{
             'div': {
                 'class': 'J-nav-item'
             }
         }, {
             'dl': {}
         }, {
             'dt': {}
         }, {
             'a': {
                 'class': "nav-level1__label",
                 'hidefocus': 'true'
             }
         }], '@href', False)),
        ('/div[@class="J-nav-item"]/dl/dt/a[@class="nav-level1__label",@hidefocus="true"]/text()',
         ([{
             'div': {
                 'class': 'J-nav-item'
             }
         }, {
             'dl': {}
         }, {
             'dt': {}
         }, {
             'a': {
                 'class': "nav-level1__label",
                 'hidefocus': 'true'
             }
         }], 'text()', False))
    ])
    def test_parse_xpath_of_tag(self, xpath, expected):
        self.spider.parse_xpath_of_tag(xpath)
        print(self.spider.xpath)
        print(self.spider.specified_contents)
        print(self.spider._flag_all)
        assert_equal((self.spider.xpath, self.spider.specified_contents,
                      self.spider._flag_all), expected)
Example #6
0
def test_spider_page():
    page_spider = PageSpider()
    assert_is_instance(page_spider.spider_page('http://sh.meituan.com'),
                       BeautifulSoup,
                       msg='No BeautifulSoup instance got')
Example #7
0
def test_request_page():
    assert_is_not_none(
        PageSpider.request_page('http://sh.meituan.com')['body'],
        msg='No response got')