def parse(self, response):
        item = ProxyspiderItem()
        print(response.text)
        ip_detail_temp = eval(response.text)
        ip_detail_temp2 = ip_detail_temp['origin']
        ip_detail_list = ip_detail_temp2.split(',')

        item['res_ip'] = ip_detail_list[0].strip()
        item['agent_ip'] = ip_detail_list[1].strip()
        print(item['res_ip'])
        print(item['agent_ip'])
        yield item
Exemple #2
0
 def parse(self, response):
     trs = etree.HTML(response.body).xpath('//table/tr[position()>1]')
     for tr in trs:
         try:
             res = tr.xpath('./td[position()>1]')
             ip = res[0].text
             port = res[1].text
             item = ProxyspiderItem(ip=ip, port=port)
             yield item
         except Exception as e:
             print(e)
     pass
Exemple #3
0
 def parse(self, response):
     trs=etree.HTML(response.body).xpath('//tbody/tr')
     for tr in trs:
         ip=tr.xpath('.//td[@data-title="IP"]')
         port=tr.xpath('.//td[@data-title="PORT"]')
         print("IP;{0},PORT:{1}".format(ip[0].text,port[0].text))
         item=ProxyspiderItem(
             ip=ip[0].text,
             port=port[0].text
         )
         yield item
     pass
Exemple #4
0
 def parse(self, response):
     movies = Selector(response).xpath('//div[@class="movie-item-hover"]')
     i = 0
     for movie in movies:
         if (i > 9):
             break
         item = ProxyspiderItem()
         title = movie.xpath(
             './a/div/div[1]/span[1]/text()').extract()[0].strip()
         t = movie.xpath('./a/div/div[2]//text()').extract()[2].strip()
         timet = movie.xpath('./a/div/div[4]//text()').extract()[2].strip()
         item = {'title': title, 'type': t, 'time': timet}
         i += 1
         yield item
 def parse(self, response):
     print(response.url)
     movies = Selector(
         response=response).xpath('''//div[@class='movie-hover-info']''')
     num = 0
     for movie in movies:
         item = ProxyspiderItem()
         movie_name = movie.xpath('./div[1]/span[1]/text()').get()
         movie_categorys = movie.xpath(
             './div[2]/text()').getall()[1].strip()
         plan_time = movie.xpath('./div[4]/text()').getall()[1].strip()
         #print(plan_time)
         item['movie_name'] = movie_name
         item['movie_categorys'] = movie_categorys
         item['plan_time'] = plan_time
         #print(item['movie_name'])
         #print(item['movie_categorys'])
         yield item
         num += 1
         if num >= 10:
             break
Exemple #6
0
    def parse(self, response):
        counter = 0
        divTags = Selector(response=response).xpath('//dd/div/div[@class="movie-item-hover"]')
        for divTag in divTags:
            if counter <= 10:
                title = divTag.xpath(
                    './a/div/div/span[@class="name "]/text()').extract_first()
                link = divTag.xpath(
                    './a[@data-act="movie-click"]/@href').extract_first()
                cat = divTag.xpath(
                    './a/div/div[2]/text()').extract()[1].strip('\n').strip()
                time = divTag.xpath(
                    './a/div/div[4]/text()').extract()[1].strip('\n').strip()

                item = ProxyspiderItem()
                item['title'] = title
                item['link'] = 'https://maoyan.com' + link
                item['time'] = time
                item['category'] = cat
                counter += 1
                yield item
            else:
                yield
Exemple #7
0
    def parse(self, response):
        movies_info = Selector(
            response=response).xpath('//div[@class="movie-hover-info"]')
        for i, movie in enumerate(movies_info):
            if i > 9:
                break

            item = ProxyspiderItem()
            item['movie_name'] = movie.xpath('./div/span/text()').get()
            item['movie_score'] = movie.xpath(
                './div/span[@class="score channel-detail-orange"]').xpath(
                    'string(.)').get(default='暂无')
            item['movie_type'] = str(
                movie.xpath('./div[1]/following-sibling::div[1]/text()').
                getall()[1]).strip()
            item['movie_actor'] = str(
                movie.xpath('./div[1]/following-sibling::div[2]/text()').
                getall()[1]).strip()
            item['movie_time'] = str(
                movie.xpath('./div[1]/following-sibling::div[3]/text()').
                getall()[1]).strip()

            yield item
Exemple #8
0
 def parse(self, response):
     item = ProxyspiderItem()
     item['ip'] = json.loads(response.text)['origin']
     yield item
Exemple #9
0
 def parse(self, response):
     items = []
     item = ProxyspiderItem()
     item['ipaddr'] = response.text
     items.append(item)
     return items