Beispiel #1
0
    def task_parse_page(self, grab, task):
        try:
            if len(grab.doc.select('//*[@id="profile_info"]/h4/div[contains(@class, "profile_deleted")]').text()) > 0:
                # print("-- Hidden user's page")
                return
        except Exception:
            pass

        try:
            user_id = task.user_id
            username = grab.doc.select('//*[@id="profile_info"]/h4/div[contains(@class, "page_name")]').text()
            city = grab.doc.select(
                "//*[@id='profile_full_info']//div[contains(@class, 'clear_fix') and div[@class='label fl_l'] = 'Город:']/div[@class='labeled fl_l']/a")\
                .one(default=XpathSelector('')).text()
            languages = grab.doc.select(
                "//*[@id='profile_info']//div[contains(@class, 'clear_fix') and div[@class='label fl_l'] = 'Языки:']/div[@class='labeled fl_l']/a")\
                .one(default=XpathSelector('')).text()
            # print((user_id, username, city, languages))
            self.cur_col += 1
            self.ws.write(self.cur_col, 0, user_id)
            self.ws.write(self.cur_col, 1, username)
            self.ws.write(self.cur_col, 2, city)
            self.ws.write(self.cur_col, 3, languages)

            self.parsed += 1
        except Exception:
            pass
Beispiel #2
0
 def test_number(self):
     self.assertEquals(
         4,
         XpathSelector(self.tree).select('//ul/li[last()]').number())
     self.assertEquals(
         6,
         XpathSelector(self.tree).select('//ul/li[last()]/@id').number())
Beispiel #3
0
    def test_get_function(self):
        func = Player.get_function('height1')
        html = '<html><body><height>3'
        self.assertEquals(3, func(XpathSelector(parse_html(html))))

        func = Player.get_function('height2')
        html = '<html><body><height>3'
        self.assertEquals(3, func(XpathSelector(parse_html(html))))
Beispiel #4
0
    def test_incorrect_xpath(self):
        # The lxml xpath function return boolean for following xpath
        # This breaks selector internal logic that assumes that only
        # list could be returnsed
        # So it was fixed and this test was crated
        sel = XpathSelector(self.tree).select('//ul/li/text()="oops"')
        self.assertEquals(False, sel.exists())

        # Selector list is always empty in this special case
        # Even if the xpath return True on lxml level
        self.assertEquals(True, self.tree.xpath('//ul[1]/li[1]/text()="one"'))
        sel = XpathSelector(self.tree).select('//ul[1]/li[1]/text()="one"')
        self.assertEquals(False, sel.exists())
Beispiel #5
0
 def test_xpath_selector(self):
     tree = fromstring('<div>test</div>')
     out = StringIO()
     with mock.patch('sys.stderr', out):
         XpathSelector(tree)
     self.assertTrue(
         'using XpathSelector from deprecated' in out.getvalue())
Beispiel #6
0
 def _build_selector(cls, tree, selector_type):
     if selector_type == 'xpath':
         return XpathSelector(tree)
     elif selector_type == 'json':
         return JsonSelector(tree)
     else:
         raise GrabMisuseError('Unknown selector type: %s' % selector_type)
Beispiel #7
0
 def test_number(self):
     sel = XpathSelector(self.tree).select('//ul/li[4]')
     self.assertEquals(4, sel.number())
Beispiel #8
0
 def test_html(self):
     sel = XpathSelector(self.tree.xpath('//h1')[0])
     self.assertEquals('<h1>test</h1>', sel.html().strip())
Beispiel #9
0
 def test_attr_list(self):
     root = XpathSelector(self.tree)
     self.assertEquals(set(['li-1', 'li-2']),
                       set(root.select('//ul[@id="second-list"]/li')\
                               .attr_list('class'))
                       )
Beispiel #10
0
 def test_one(self):
     sel = XpathSelector(self.tree).select('//ul/li')
     self.assertEquals('one', sel.one().node.text)
     self.assertEquals('one', sel.text())
Beispiel #11
0
 def test_select_select(self):
     root = XpathSelector(self.tree)
     self.assertEquals(
         set(['one', 'yet one']),
         set([x.text() for x in root.select('//ul').select('./li[1]')]),
     )
Beispiel #12
0
 def test_text_list(self):
     root = XpathSelector(self.tree)
     self.assertEquals(
         set(['one', 'yet one']),
         set(root.select('//ul/li[1]').text_list()),
     )
Beispiel #13
0
 def _selector(self):
     return XpathSelector(self._node)
Beispiel #14
0
 def test_text_selector(self):
     sel = XpathSelector(self.tree).select('//li/text()').one()
     self.assertTrue(isinstance(sel, TextSelector))
Beispiel #15
0
 def test_textselector(self):
     self.assertEquals(
         'one',
         XpathSelector(self.tree).select('//li/text()').text())
Beispiel #16
0
 def select(self, *args, **kwargs):
     return XpathSelector(self.tree).select(*args, **kwargs)
Beispiel #17
0
 def test_select_node(self):
     self.assertEquals('test',
                       XpathSelector(self.tree).select('//h1')[0].node.text)
Beispiel #18
0
 def test_in_general(self):
     sel = XpathSelector(self.tree)
Beispiel #19
0
    def test_exists(self):
        sel = XpathSelector(self.tree).select('//ul/li[4]')
        self.assertEquals(True, sel.exists())

        sel = XpathSelector(self.tree).select('//ul/li[5]')
        self.assertEquals(False, sel.exists())