Esempi in Python per Selector.index

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: scrapy

Classe/tipologia: Selector

Metodo/funzione: index

Esempi su hotexamples.com: 5

Selector.index in Python: 5 esempi trovati. Questi sono i migliori esempi reali in Python per scrapy.Selector.index, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

Selector(30)

css(30)

split(30)

xpath(30)

re(24)

extract(22)

replace(11)

strip(9)

__len__(8)

remove_namespaces(7)

startswith(7)

find(6)

select(6)

__contains__(4)

extract_first(3)

index(3)

append(2)

register_namespace(2)

re_first(2)

group(2)

get(2)

findall(2)

endswith(1)

rsplit(1)

json(1)

select_by_visible_text(1)

isdigit(1)

Esempio n. 1

Mostra file

File: hotdownload.py Progetto: toothlou/Python000-class01

    def parse_film(self, response):
        item = response.meta['item']
        film_rank = Selector(response=response).xpath(
            '/html/body/div[2]/div/div[1]/div[2]/div/ul/li[1]/p/text()'
        ).extract_first()
        # film_viewcount = Selector(response=response).xpath('//*[@id="resource_views"]/../..')
        # film_viewcount = Selector(response=response).xpath('//*[@id="resource_views"]//text()').extract()[0]
        film_viewcount = '0'
        film_class = Selector(response=response).xpath(
            '/html/body/div[2]/div/div[1]/div[1]/div[2]/div[2]/div/img/@src'
        ).extract_first()
        film_cover = Selector(response=response).xpath(
            '/html/body/div[2]/div/div[1]/div[1]/div[2]/div[1]/div[1]/a/img/@src'
        ).extract_first()

        index = film_rank.index(':')
        film_rank = film_rank[index + 1:].lstrip().rstrip()
        print(f'film_rank: {film_rank}')
        print(f'film_class: {film_class}')
        print(f'film_cover: {film_cover}')
        print(f'film_viewcount: {film_viewcount}')
        # print(f'film_viewcount: {film_viewcount.xpath("./div/label/text()").extract()[0]}')
        item['film_class'] = 'a'
        item['film_rank'] = film_rank
        item['film_class'] = film_class
        item['film_cover'] = film_cover
        item['film_viewcount'] = film_viewcount

        # item['film_viewcount'] = film_viewcount
        # print(item)
        print(f"in parse_film , item: {item}")
        return item

Esempio n. 2

Mostra file

    def parse(self, response):
        try:
            for div in (response.xpath(
                    "//div[@id='list']/table[@class='table']/tbody/tr/td[@class='ip']"
            )):
                tag_str = re.findall(r'>(\S+)</|>\W+<', str(div.extract()))
                if tag_str:
                    ip_port = ""
                    for temp_str in tag_str:
                        if not temp_str:
                            temp_str = temp_str.replace("", ":")
                        target_str = re.search(r'[^<>/]\d+|\d+\.|\.|\d+|\:',
                                               temp_str)

                        if target_str:
                            ip_port += target_str.group()
                    print ip_port
                    str_list = ip_port.strip().split(":")
                    ip = str_list[0]
                    port = str_list[1]

                    item = IpspiderItem(ip=ip, port=port)
                    yield item

            current_page = response.xpath(".//span[@class='current']/text()")
            if current_page:
                current_page = current_page.extract()[0]

            nextPage_list = Selector(
                response=response).re(u'<a href="(\S*)">\d+</a>')
            index = ""
            if nextPage_list:
                for temp in nextPage_list:
                    page_num = re.search(r'\d+', str(temp))
                    if page_num:
                        if int(page_num.group()) > int(current_page):
                            index = nextPage_list.index(temp)
                            break

            next_page = nextPage_list[int(index)]
            print next_page

            yield scrapy.Request(url="http://www.goubanjia.com/free/" +
                                 str(next_page),
                                 callback=self.parse)

        except Exception, e:
            print e
            pass

Esempio n. 3

Mostra file

File: inquiry.py Progetto: hias234/OffenesParlament

 def xt(cls, response):
     docs = []
     raw_docs = response.xpath(cls.LI_XPATH)
     for raw_doc in raw_docs:
         html_url, pdf_url = "", ""
         urls = raw_doc.css("a").xpath("@href").extract()
         for url in urls:
             if url.endswith(".pdf"):
                 pdf_url = url
             elif url.endswith(".html"):
                 html_url = url
         title = Selector(text=raw_doc.extract()).xpath("//a[1]/text()").extract()[0]
         title = title[: title.index("/")].strip()
         docs.append({"title": title, "html_url": html_url, "pdf_url": pdf_url})
     return docs

Esempio n. 4

Mostra file

File: law.py Progetto: vmnet04/OffenesParlament

 def xt(cls, response):
     docs = []
     raw_docs = response.xpath(cls.LI_XPATH)
     for raw_doc in raw_docs:
         html_url, pdf_url = "", ""
         urls = raw_doc.css('a').xpath('@href').extract()
         for url in urls:
             if url.endswith('.pdf'):
                 pdf_url = url
             elif url.endswith('.html'):
                 html_url = url
         title = Selector(
             text=raw_doc.extract()).xpath('//a[1]/text()').extract()[0]
         title = title[:title.index('/')].strip()
         docs.append({
             'title': title,
             'html_url': html_url,
             'pdf_url': pdf_url
         })
     return docs

Esempio n. 5

Mostra file

File: inquiry.py Progetto: 11i4/OffenesParlament

 def xt(cls, response):
     docs = []
     raw_docs = response.xpath(cls.LI_XPATH)
     for raw_doc in raw_docs:
         html_url, pdf_url = "", ""
         urls = raw_doc.css('a').xpath('@href').extract()
         for url in urls:
             if url.endswith('.pdf'):
                 pdf_url = url
             elif url.endswith('.html'):
                 html_url = url
         title = Selector(text=raw_doc.extract()).xpath(
             '//a[1]/text()').extract()[0]
         title = title[:title.index('/')].strip()
         docs.append({
             'title': title,
             'html_url': html_url,
             'pdf_url': pdf_url
         })
     return docs