Python Selector.lower Examples

Programming Language: Python

Namespace/Package Name: scrapy.selector

Class/Type: Selector

Method/Function: lower

Examples at hotexamples.com: 2

Python Selector.lower - 2 examples found. These are the top rated real world Python examples of scrapy.selector.Selector.lower extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Selector(30)

strip(30)

split(30)

css(30)

select(30)

replace(30)

extract(30)

re(30)

extract_first(19)

remove_namespaces(16)

index(9)

rstrip(9)

lstrip(9)

encode(8)

register_namespace(6)

find(5)

remove(4)

append(4)

startswith(3)

rindex(3)

extend(3)

get(3)

re_first(2)

getall(2)

lower(2)

pop(1)

partition(1)

extract_unquoted(1)

__getattribute__(1)

rfind(1)

items(1)

decode(1)

find_all(1)

group(1)

__len__(1)

title(1)

to_csv(1)

url(1)

Example #1

Show file

    def processPaper(self, response):
        # get abstract text
        text = Selector(text=response.body).xpath('//text()').extract()
        text = ''.join(text)
        abstract_index = text.lower().find("abstract")
        abs_index = text.lower().find("[abs]")
        abstract = text[abstract_index + 8:abs_index]
        abstract = abstract.replace("\n", " ").strip()
        item = response.meta['item']
        item['abstract'] = abstract

        # process pdf link
        request = scrapy.Request(item['url'], callback=processPDF)
        request.meta['item'] = item
        yield request

Example #2

Show file

File: brainyquote_spider.py Project: taipeifx/bootcamp007_project

    def parse(self, response):
        """Parse the search result of an author"""

        author_name = response.meta['author']
        print "Parsing author {}".format(author_name)
        try:
            # Extract the panel on the right containing author results
            panel = response.xpath(
                '//*[@class="six columns omega"]').extract_first()

            # Check that the Matching Pages includes 'Authors'
            matching_pages = Selector(
                text=panel).xpath('//*[@class="bq_s"]').extract_first()
            matching_pages_text = Selector(
                text=matching_pages).xpath('//text()').extract()

            if "Authors" not in matching_pages_text:
                raise ValueError("No author page found!")

            # Get the list of authors by extracting the first row
            matches_row = Selector(text=matching_pages).xpath(
                '//*[@class="row"]').extract_first()
            matches = Selector(
                text=matches_row).xpath('//*[@class="bqLn"]').extract()

            # Find matching author and extract their url
            author_href = None
            for match in matches:
                match_name = Selector(
                    text=match).xpath('//a//text()').extract_first()
                if match_name.lower() == author_name.lower():
                    author_href = Selector(
                        text=match).xpath('//a/@href').extract_first()
                    author_href = response.urljoin(author_href)
                    break
            if not author_href:
                raise ValueError("No matching author found!")

        except Exception, e:
            print "Error parsing author: {}".format(author_name)
            print str(e)
            with open(missing_authors_file, 'a') as f:
                f.write(author_name + '\n')
            return