예제 #1
0
    def test_get_function(self):
        func = Player.get_function('height1')
        html = '<html><body><height>3'
        self.assertEquals(3, func(XpathSelector(parse_html(html))))

        func = Player.get_function('height2')
        html = '<html><body><height>3'
        self.assertEquals(3, func(XpathSelector(parse_html(html))))
예제 #2
0
    def set_input_by_number(self, number, value):
        """
        Set the value of form element by its number in the form

        :param number: number of element
        :param value: value which should be set to element
        """

        sel = XpathSelector(self.form)
        elem = sel.select('.//input[@type="text"]')[number].node()
        return self.set_input(elem.get('name'), value)
예제 #3
0
    def set_input_by_number(self, number, value):
        """
        Set the value of form element by its number in the form

        :param number: number of element
        :param value: value which should be set to element
        """

        sel = XpathSelector(self.form)
        elem = sel.select('.//input[@type="text"]')[number].node()
        return self.set_input(elem.get('name'), value)
예제 #4
0
    def set_input_by_id(self, _id, value):
        """
        Set the value of form element by its `id` attribute.

        :param _id: id of element
        :param value: value which should be set to element
        """

        xpath = './/*[@id="%s"]' % _id
        if self._lxml_form is None:
            self.choose_form_by_element(xpath)
        sel = XpathSelector(self.form)
        elem = sel.select(xpath).node()
        return self.set_input(elem.get('name'), value)
예제 #5
0
    def set_input_by_id(self, _id, value):
        """
        Set the value of form element by its `id` attribute.

        :param _id: id of element
        :param value: value which should be set to element
        """

        xpath = './/*[@id="%s"]' % _id
        if self._lxml_form is None:
            self.choose_form_by_element(xpath)
        sel = XpathSelector(self.form)
        elem = sel.select(xpath).node()
        return self.set_input(elem.get('name'), value)
예제 #6
0
    def task_article(self, grab, task):
        page_header = grab.doc.select("//span[@class='post_title']").one(
            default=XpathSelector('No Header!')).text()
        page_favs = grab.doc.select("//*[@class='favs_count']").one(
            default=XpathSelector('0')).text()
        page_score = grab.doc.\
            select("//div[@class='infopanel_wrapper']//div[contains(@class, 'mark')]/span[@class='score']").\
            one(default=XpathSelector('0')).text()
        page_comments_count = grab.doc.select("//*[@id='comments_count']").one(
            default=XpathSelector('0')).text()
        page_author = grab.doc.select("//div[@class='author']/a").one(
            default=XpathSelector('No Author!')).text()
        page_date = grab.doc.select("//div[@class='published']").one(
            default=XpathSelector('0')).text()

        self.save_result(
            data={
                'page': task.current_page,
                'url': task.url,
                'header': page_header,
                'author': page_author,
                'favorites': page_favs,
                'score': page_score,
                'comments': page_comments_count,
                'date': page_date
            })
예제 #7
0
def main(**kwargs):
    data = open('data/awesome_python.html', 'rb').read()

    start = time.time()
    for x in range(500):
        tree = fromstring(data)
        assert tree.xpath('//title')[0].text.startswith('awesome-web-scraping')
    print('lxml:xpath %.2f' % (time.time() - start))

    start = time.time()
    for x in range(500):
        tree = fromstring(data)
        assert XpathSelector(tree).select('//title').text().startswith('awesome-web-scraping')
    print('selection:select %.2f' % (time.time() - start))
예제 #8
0
    def get_prices(self, grab, subject):
        """Parsing information about Obligatory extras and Optional extras
        for objects are prices and optional"""
        prices = []
        try:
            extras = grab.doc.rex_text(
                '<h3 class\="h6 copy-sp-m">.*?%s.*?</h3>(.+?)</ul>' % subject,
                flags=re.S)
        except DataNotFound:
            logging.debug("Price %s is not found on %s" %
                          (subject, grab.doc.url))
            return None

        sel = XpathSelector(fromstring(extras))
        prices = []
        for li in sel.select('//li[@class="list__item u-cf"]'):
            obligatory = OrderedDict()
            obligatory['name'] = li.select('node()').text()
            money = li.select('node()/strong').text()
            obligatory['value'] = money[1:].replace(',', '')

            # Find perweek or perday
            if li.select('span[@class="boatview__extras-amount"' +
                         ' and contains(text(),"per week")]').exists():
                obligatory['perweek'] = True
            elif li.select('span[@class="boatview__extras-amount"' +
                           ' and contains(text(),"per day")]').exists():
                obligatory['perday'] = True
            obligatory['currency'] = money[0]
            prices.append(obligatory)

        if len(prices) < 1:
            logging.debug("Price %s contains less than one element on: %s" %
                          (subject, grab.doc.url))
            return None

        return prices
예제 #9
0
 def xpath(self, query):
     return XpathSelector(self.dom()).select(query)
예제 #10
0
 def select(self, *args, **kwargs):
     return XpathSelector(self.tree).select(*args, **kwargs)
예제 #11
0
 def select(self, xpath):
     sel = XpathSelector(self.dom_tree)
     return sel.select(xpath)