Esempio n. 1
0
 def parse_content(self, response):
     category = response.meta['category']
     img_urls = response.xpath('//img[@class="screen"]/@src').extract()
     item = ImgsItem()
     item["category"] = category
     item["image_urls"] = ["http://www.goodwp.com/" + img_urls[0]]
     yield item
Esempio n. 2
0
 def parse_content(self, response):
     category = response.meta['category']
     img_urls = response.xpath('//a[@class="img"]/img/@src').extract()
     item = ImgsItem()
     item["category"] = category
     item["image_urls"] = ["https://bonpic.com" + img_urls[0]]
     yield item
Esempio n. 3
0
 def parse_content(self, response):
     category=response.meta['category']
     img_urls = response.xpath('//div[@class="png_big"]/a/img/@src').extract()
     item = ImgsItem()
     item["category"] =  category
     item["image_urls"] = ["http://pngimg.com"+img_urls[0]]
     yield item
Esempio n. 4
0
    def parse(self, response):
        content = response.text
        pattern_url = re.compile(
            r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
        )
        links = re.findall(pattern_url, content)
        keyword = [
            "p=tbn", "images", "image", "jpg", "png", "jpeg", "PNG", "JPG",
            "JPEG"
        ]
        img_urls = []
        for link in links:
            url = link.encode("utf8").decode('unicode_escape')
            is_img = any([word in url for word in keyword])
            if is_img:
                img_urls.append(url)

        item = ImgsItem()
        item["category"] = self.category
        item["image_urls"] = img_urls
        yield item

        if links:
            current_num = re.findall('&ijn=(.*?)&start=', response.url)[0]
            url = 'https://www.google.com/search?ei=aM0JXfmZBcqD8gXIy7ww&yv=3&q={category}&tbm=isch&vet=10ahUKEwj57YWU7fTiAhXKgbwKHcglDwYQuT0ITCgB.aM0JXfmZBcqD8gXIy7ww.i&ved=0ahUKEwj57YWU7fTiAhXKgbwKHcglDwYQuT0ITCgB&ijn={page}&start={page}00&asearch=ichunk&async=_id:rg_s,_pms:s,_fmt:pc'.format(
                category=quote(self.category), page=int(current_num) + 1)
            yield scrapy.Request(url=url,
                                 callback=self.parse,
                                 dont_filter=True)
Esempio n. 5
0
 def parse_content(self, response):
     category=response.meta['category']
     img_urls = response.xpath('//*[@id="photoImage"]/@src').extract()
     item = ImgsItem()
     item["category"] =  category
     item["image_urls"] = img_urls
     yield item
Esempio n. 6
0
 def parse_content(self, response):
     img_urls = response.xpath('//img[@id="mainImg"]/@src').extract()
     img_url="http:"+img_urls[0]
     item = ImgsItem()
     item["image_urls"] = [img_url]
     item["category"] = self.category
     yield item
Esempio n. 7
0
 def parse_content(self, response):
     img_urls = response.xpath(
         '//ul[@class="p-slider__nav"]/li/img/@src').extract()
     item = ImgsItem()
     item["image_urls"] = ["https://ec.shop.acure-fun.net" + img_urls[0]]
     item["category"] = "image_acure"
     yield item
Esempio n. 8
0
 def parse_item(self, response):
     img_urls = response.xpath(
         '//*[@id="post_content"]//img/@src').extract()
     for img_url in img_urls:
         item = ImgsItem()
         item['url'] = img_url
         yield item
Esempio n. 9
0
 def parse_content(self, response):
     img_urls = response.xpath(
         '//div[@class="productWrap"]/p/img/@src').extract()
     item = ImgsItem()
     item["image_urls"] = ["https://www.ajinomoto.co.jp/" + img_urls[0]]
     item["category"] = "image_ajinomoto"
     yield item
Esempio n. 10
0
 def parse_content(self, response):
     img_urls = response.xpath(
         '//a[@class="product-info-zoom"]/img/@src').extract()
     item = ImgsItem()
     item["image_urls"] = img_urls
     item["category"] = "image_japantrendshop"
     yield item
Esempio n. 11
0
 def parse_content(self, response):
     img_urls = response.xpath(
         '//div[@id="featured-img-id"]/img/@src').extract()
     item = ImgsItem()
     item["category"] = self.category
     item["image_urls"] = img_urls
     yield item
Esempio n. 12
0
 def parse(self, response):
     resp = response.text
     html = etree.HTML(resp)
     img_urls = html.xpath('//img[@itemprop="thumbnail"]/@src')
     item = ImgsItem()
     item["category"] = self.category
     item["image_urls"] = img_urls
     yield item
Esempio n. 13
0
 def parse_content(self, response):
     category=response.meta['category']
     img_urls = response.xpath('//div/p//img/@src').extract()
     print(img_urls)
     item = ImgsItem()
     item["category"] =  category
     item["image_urls"] = img_urls
     yield item
Esempio n. 14
0
 def parse_content(self, response):
     img_urls = response.xpath(
         '//img[@class="attachment-post-thumbnail size-post-thumbnail wp-post-image"]/@src'
     ).extract()
     item = ImgsItem()
     item["image_urls"] = img_urls
     item["category"] = "image_asahiya"
     yield item
Esempio n. 15
0
 def parse(self, response):
     img_urls = response.xpath('//p[@class="imgPart"]/img/@src').extract()
     for img_url in img_urls:
         img_url = "https://www.kagome.co.jp" + img_url
         item = ImgsItem()
         item["image_urls"] = [img_url]
         item["category"] = "image_kagome"
         yield item
Esempio n. 16
0
 def parse_content(self, response):
     pattern = response.meta['pattern']
     img_urls = response.xpath(
         '//a[@id="detailsGallery_opener"]/img/@data-zoom-image').extract()
     item = ImgsItem()
     item["image_urls"] = img_urls
     item["category"] = pattern[0]
     yield item
Esempio n. 17
0
 def parse(self, response):
     page = response.text
     pattern = re.findall(
         r"data-photo-modal-image-download-link=\\'(https://images.pexels.com/photos/.*?\.jpg&fm=jpg)\\",
         page)
     item = ImgsItem()
     item["category"] = self.category
     item["image_urls"] = pattern
     yield item
Esempio n. 18
0
 def parse_url(self, response):
     resp = response.text
     reg = r'src="(https://timgsa.baidu.com/timg.*?g.*?)"'
     imgre = re.compile(reg)
     img_list = re.findall(imgre, resp)
     item = ImgsItem()
     item["category"] = self.category
     item["image_urls"] = img_list
     yield item
Esempio n. 19
0
 def parse_content(self, response):
     img_urls = response.xpath(
         '//a[@class="modal-open"]/span/img/@src').extract()
     category = response.xpath(
         '//*[@id="breadclumb"]/ol/li[2]/a//text()').extract()
     item = ImgsItem()
     item["image_urls"] = ["https://www.cecile.co.jp" + img_urls[0]]
     item["category"] = category[0] + 'image_cecile'
     yield item
Esempio n. 20
0
 def parse_content(self, response):
     resp = demjson.decode(response.text)
     data = resp.get("results", [])
     for img in data:
         img = img.get("image_url")
         item = ImgsItem()
         item["category"] = "image_takeya"
         item["image_urls"] = [img]
         yield item
Esempio n. 21
0
 def parse(self, response):
     page = response.text
     json_contents = demjson.decode(page)
     for json_content in json_contents["collection"]:
         image = json_content["thumbnail"]
         item = ImgsItem()
         item["category"] = self.category
         item["image_urls"] = [image]
         yield item
Esempio n. 22
0
 def parse(self, response):
     resp = demjson.decode(response.text)
     data = resp.get("list", [])
     for img in data:
         img = img.get("img")
         item = ImgsItem()
         item["category"] = self.category
         item["image_urls"] = [img]
         yield item
Esempio n. 23
0
 def parse(self, response):
     # resp = json.loads(response.text,strict=False)
     resp = demjson.decode(response.text)
     data = resp.get("items", [])
     for img in data:
         pic_url = img.get("pic_url")
         item = ImgsItem()
         item["category"] = self.category
         item["image_urls"] = [pic_url]
         yield item
Esempio n. 24
0
 def parse(self, response):
     category=response.meta['category']
     resp = demjson.decode(response.text)
     data = resp.get("items", [])
     for img in data:
         pic_url = img.get("pic_url")
         item = ImgsItem()
         item["category"] = category
         item["image_urls"] = [pic_url]
         yield item
Esempio n. 25
0
 def parse(self, response):
     page = response.text
     json_contents = demjson.decode(page)
     for json_content in json_contents["results"]:
         urls = json_content["urls"]
         image = urls["raw"]
         item = ImgsItem()
         item["category"] = self.category
         item["image_urls"] = [image]
         yield item
Esempio n. 26
0
 def parse_url(self, response):
     img_patterns = response.xpath(
         '//li[@class="row"]/p/a[1]/@href').extract()
     for img_pattern in img_patterns:
         pattern = re.findall("\d\/(.*?)\.html", img_pattern)
         img_url = "https://catalog-p.meiji.co.jp/imageDisp.php?type=product&id={}".format(
             pattern[0])
         item = ImgsItem()
         item["image_urls"] = [img_url]
         item["category"] = "image_meiji"
         yield item
Esempio n. 27
0
 def parse_content(self, response):
     img_urls = response.xpath(
         '//div[@class="img-package"]/img/@src').extract()
     item = ImgsItem()
     res_url = response.url
     pattern = re.findall(r"0\d\/(.*\.html)", res_url)
     url = res_url.replace(pattern[0], img_urls[0])
     print(url)
     item["image_urls"] = [url]
     item["category"] = "image_yamazakipan"
     yield item
Esempio n. 28
0
 def parse_content(self, response):
     category = response.xpath('//*[@id="contents"]/div[1]/div/ol/li[2]/a/text()').extract()
     img_urls = response.xpath('//img[@class="BigImage "]/@src').extract()
     item = ImgsItem()
     item["image_urls"] = ["http:"+img_urls[0]]
     category = category[0]
     if "/" in category:
         category=re.sub("/","",category)
         item["category"] = category+'image_dccoffee'
     else:
         item["category"] = category+'image_dccoffee'
     yield item
 def parse_content(self, response):
     category = response.meta['category']
     img_urls = response.xpath('//img[@class="BigImage "]/@src').extract()
     item = ImgsItem()
     item["image_urls"] = ["http:"+img_urls[0]]
     category = category[0]
     if "/" in category:
         category=re.sub("/","",category)
         item["category"] = category+"image_coffeefriend"
     else:
         item["category"] = category+"image_coffeefriend"
     yield item
Esempio n. 30
0
 def parse(self, response):
     category = response.meta['category']
     page = response.text
     json_contents = demjson.decode(page)
     for json_content in json_contents["data"]:
         url = json_content["url"]
         image = url["baseUrl"]
         image_url = image + "!p4"
         item = ImgsItem()
         item["category"] = category
         item["image_urls"] = [image_url]
         yield item