Ejemplo n.º 1
0
def parse_index_size(grab):
    """
    Extract number of results from grab instance which
    has received google search results.
    """

    text = None
    if grab.search(u'did not match any documents'):
        return 0
    if len(grab.css_list('#resultStats')):
        text = grab.css_text('#resultStats')
    if len(grab.xpath_list('//div[@id="subform_ctrl"]/div[2]')):
        text = grab.xpath_text('//div[@id="subform_ctrl"]/div[2]')
    if text is None:
        logging.error('Unknown google page format')
        return 0
    text = text.replace(',', '').replace('.', '')
    if 'about' in text:
        number = find_number(text.split('about')[1])
        return int(number)
    elif 'of' in text:
        number = find_number(text.split('of')[1])
        return int(number)
    else:
        number = find_number(text)
        return int(number)
Ejemplo n.º 2
0
def parse_index_size(grab):
    """
    Extract number of results from grab instance which
    has received google search results.
    """

    text = None
    if grab.search(u'did not match any documents'):
        return 0
    if len(grab.css_list('#resultStats')):
        text = grab.css_text('#resultStats')
    if len(grab.xpath_list('//div[@id="subform_ctrl"]/div[2]')):
        text = grab.xpath_text('//div[@id="subform_ctrl"]/div[2]')
    if text is None:
        logging.error('Unknown google page format')
        return 0
    text = text.replace(',', '').replace('.', '')
    if 'about' in text:
        number = find_number(text.split('about')[1])
        return int(number)
    elif 'of' in text:
        number = find_number(text.split('of')[1])
        return int(number)
    else:
        number = find_number(text)
        return int(number)
Ejemplo n.º 3
0
def parse_index_size(grab):
    """
    Extract number of results from grab instance which
    has received google search results.
    """

    text = None
    if grab.search(u"did not match any documents"):
        return 0
    if len(grab.css_list("#resultStats")):
        text = grab.css_text("#resultStats")
    if len(grab.xpath_list('//div[@id="subform_ctrl"]/div[2]')):
        text = grab.xpath_text('//div[@id="subform_ctrl"]/div[2]')
    if text is None:
        logging.error("Unknown google page format")
        return 0
    text = text.replace(",", "").replace(".", "")
    if "about" in text:
        number = find_number(text.split("about")[1])
        return int(number)
    elif "of" in text:
        number = find_number(text.split("of")[1])
        return int(number)
    else:
        number = find_number(text)
        return int(number)
Ejemplo n.º 4
0
def find_node_number(node, ignore_spaces=False, make_int=True):
    """
    Find number in text content of the `node`.
    """

    text = get_node_text(node)
    return find_number(text, ignore_spaces=ignore_spaces, make_int=make_int)
Ejemplo n.º 5
0
def find_node_number(node, ignore_spaces=False, make_int=True):
    """
    Find number in text content of the `node`.
    """

    text = get_node_text(node)
    return find_number(text, ignore_spaces=ignore_spaces, make_int=make_int)
Ejemplo n.º 6
0
 def test_find_number(self):
     self.assertEqual(2, find_number('2'))
     self.assertEqual(2, find_number('foo 2 4 bar'))
     self.assertEqual('2', find_number('foo 2 4 bar', make_int=False))
     self.assertEqual(24, find_number('foo 2 4 bar', ignore_spaces=True))
     self.assertEqual(
         24, find_number(u'бешеный 2 4 барсук', ignore_spaces=True))
     self.assertRaises(DataNotFound, lambda: find_number('foo'))
     self.assertRaises(DataNotFound, lambda: find_number(u'фыва'))
Ejemplo n.º 7
0
 def number(self, default=NULL, ignore_spaces=False,
            smart=False, make_int=True):
     try:
         return find_number(self.text(smart=smart),
                            ignore_spaces=ignore_spaces,
                            make_int=make_int)
     except IndexError:
         if default is NULL:
             raise
         else:
             return default
Ejemplo n.º 8
0
 def test_find_number(self):
     self.assertEqual(2, find_number('2'))
     self.assertEqual(2, find_number('foo 2 4 bar'))
     self.assertEqual('2', find_number('foo 2 4 bar', make_int=False))
     self.assertEqual(24, find_number('foo 2 4 bar', ignore_spaces=True))
     self.assertEqual(24, find_number(u'бешеный 2 4 барсук', ignore_spaces=True))
     self.assertRaises(DataNotFound,
         lambda: find_number('foo'))
     self.assertRaises(DataNotFound,
         lambda: find_number(u'фыва'))
Ejemplo n.º 9
0
    def css_number(self, path, default=NULL, ignore_spaces=False, smart=False, make_int=True):
        """
        Find number in normalized text of node which matches the given css path.
        """

        try:
            text = self.css_text(path, smart=smart)
            return find_number(text, ignore_spaces=ignore_spaces, make_int=make_int)
        except IndexError:
            if default is NULL:
                raise
            else:
                return default
Ejemplo n.º 10
0
    def css_number(self, path, default=NULL, ignore_spaces=False, smart=False,
                   make_int=True):
        """
        Find number in normalized text of node which matches the given css path.
        """

        try:
            text = self.css_text(path, smart=smart)
            return find_number(text, ignore_spaces=ignore_spaces, make_int=make_int)
        except IndexError:
            if default is NULL:
                raise
            else:
                return default
Ejemplo n.º 11
0
 def number(self,
            default=NULL,
            ignore_spaces=False,
            smart=False,
            make_int=True):
     try:
         return find_number(self.text(smart=smart),
                            ignore_spaces=ignore_spaces,
                            make_int=make_int)
     except IndexError:
         if default is NULL:
             raise
         else:
             return default
Ejemplo n.º 12
0
    def process_raw_value(self, value):
        if self.empty_default is not NULL:
            if value == "":
                return self.empty_default

        if self.find_number or self.ignore_spaces or self.ignore_chars:
            return find_number(self.process(value), ignore_spaces=self.ignore_spaces,
                               ignore_chars=self.ignore_chars)
        else:
            # TODO: process ignore_chars and ignore_spaces in this case too
            if self.ignore_chars:
                for char in ignore_chars:
                    value = value.replace(char, '')
            if self.ignore_spaces:
                value = drop_space(value)
            return int(self.process(value).strip())
Ejemplo n.º 13
0
    def process_raw_value(self, value):
        if self.empty_default is not NULL:
            if value == "":
                return self.empty_default

        if self.find_number or self.ignore_spaces or self.ignore_chars:
            return find_number(self.process(value),
                               ignore_spaces=self.ignore_spaces,
                               ignore_chars=self.ignore_chars)
        else:
            # TODO: process ignore_chars and ignore_spaces in this case too
            if self.ignore_chars:
                for char in ignore_chars:
                    value = value.replace(char, '')
            if self.ignore_spaces:
                value = drop_space(value)
            return int(self.process(value).strip())