def parse_content(self, response): category = response.meta['category'] img_urls = response.xpath('//img[@class="screen"]/@src').extract() item = ImgsItem() item["category"] = category item["image_urls"] = ["http://www.goodwp.com/" + img_urls[0]] yield item
def parse_content(self, response): category = response.meta['category'] img_urls = response.xpath('//a[@class="img"]/img/@src').extract() item = ImgsItem() item["category"] = category item["image_urls"] = ["https://bonpic.com" + img_urls[0]] yield item
def parse_content(self, response): category=response.meta['category'] img_urls = response.xpath('//div[@class="png_big"]/a/img/@src').extract() item = ImgsItem() item["category"] = category item["image_urls"] = ["http://pngimg.com"+img_urls[0]] yield item
def parse(self, response): content = response.text pattern_url = re.compile( r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+' ) links = re.findall(pattern_url, content) keyword = [ "p=tbn", "images", "image", "jpg", "png", "jpeg", "PNG", "JPG", "JPEG" ] img_urls = [] for link in links: url = link.encode("utf8").decode('unicode_escape') is_img = any([word in url for word in keyword]) if is_img: img_urls.append(url) item = ImgsItem() item["category"] = self.category item["image_urls"] = img_urls yield item if links: current_num = re.findall('&ijn=(.*?)&start=', response.url)[0] url = 'https://www.google.com/search?ei=aM0JXfmZBcqD8gXIy7ww&yv=3&q={category}&tbm=isch&vet=10ahUKEwj57YWU7fTiAhXKgbwKHcglDwYQuT0ITCgB.aM0JXfmZBcqD8gXIy7ww.i&ved=0ahUKEwj57YWU7fTiAhXKgbwKHcglDwYQuT0ITCgB&ijn={page}&start={page}00&asearch=ichunk&async=_id:rg_s,_pms:s,_fmt:pc'.format( category=quote(self.category), page=int(current_num) + 1) yield scrapy.Request(url=url, callback=self.parse, dont_filter=True)
def parse_content(self, response): category=response.meta['category'] img_urls = response.xpath('//*[@id="photoImage"]/@src').extract() item = ImgsItem() item["category"] = category item["image_urls"] = img_urls yield item
def parse_content(self, response): img_urls = response.xpath('//img[@id="mainImg"]/@src').extract() img_url="http:"+img_urls[0] item = ImgsItem() item["image_urls"] = [img_url] item["category"] = self.category yield item
def parse_content(self, response): img_urls = response.xpath( '//ul[@class="p-slider__nav"]/li/img/@src').extract() item = ImgsItem() item["image_urls"] = ["https://ec.shop.acure-fun.net" + img_urls[0]] item["category"] = "image_acure" yield item
def parse_item(self, response): img_urls = response.xpath( '//*[@id="post_content"]//img/@src').extract() for img_url in img_urls: item = ImgsItem() item['url'] = img_url yield item
def parse_content(self, response): img_urls = response.xpath( '//div[@class="productWrap"]/p/img/@src').extract() item = ImgsItem() item["image_urls"] = ["https://www.ajinomoto.co.jp/" + img_urls[0]] item["category"] = "image_ajinomoto" yield item
def parse_content(self, response): img_urls = response.xpath( '//a[@class="product-info-zoom"]/img/@src').extract() item = ImgsItem() item["image_urls"] = img_urls item["category"] = "image_japantrendshop" yield item
def parse_content(self, response): img_urls = response.xpath( '//div[@id="featured-img-id"]/img/@src').extract() item = ImgsItem() item["category"] = self.category item["image_urls"] = img_urls yield item
def parse(self, response): resp = response.text html = etree.HTML(resp) img_urls = html.xpath('//img[@itemprop="thumbnail"]/@src') item = ImgsItem() item["category"] = self.category item["image_urls"] = img_urls yield item
def parse_content(self, response): category=response.meta['category'] img_urls = response.xpath('//div/p//img/@src').extract() print(img_urls) item = ImgsItem() item["category"] = category item["image_urls"] = img_urls yield item
def parse_content(self, response): img_urls = response.xpath( '//img[@class="attachment-post-thumbnail size-post-thumbnail wp-post-image"]/@src' ).extract() item = ImgsItem() item["image_urls"] = img_urls item["category"] = "image_asahiya" yield item
def parse(self, response): img_urls = response.xpath('//p[@class="imgPart"]/img/@src').extract() for img_url in img_urls: img_url = "https://www.kagome.co.jp" + img_url item = ImgsItem() item["image_urls"] = [img_url] item["category"] = "image_kagome" yield item
def parse_content(self, response): pattern = response.meta['pattern'] img_urls = response.xpath( '//a[@id="detailsGallery_opener"]/img/@data-zoom-image').extract() item = ImgsItem() item["image_urls"] = img_urls item["category"] = pattern[0] yield item
def parse(self, response): page = response.text pattern = re.findall( r"data-photo-modal-image-download-link=\\'(https://images.pexels.com/photos/.*?\.jpg&fm=jpg)\\", page) item = ImgsItem() item["category"] = self.category item["image_urls"] = pattern yield item
def parse_url(self, response): resp = response.text reg = r'src="(https://timgsa.baidu.com/timg.*?g.*?)"' imgre = re.compile(reg) img_list = re.findall(imgre, resp) item = ImgsItem() item["category"] = self.category item["image_urls"] = img_list yield item
def parse_content(self, response): img_urls = response.xpath( '//a[@class="modal-open"]/span/img/@src').extract() category = response.xpath( '//*[@id="breadclumb"]/ol/li[2]/a//text()').extract() item = ImgsItem() item["image_urls"] = ["https://www.cecile.co.jp" + img_urls[0]] item["category"] = category[0] + 'image_cecile' yield item
def parse_content(self, response): resp = demjson.decode(response.text) data = resp.get("results", []) for img in data: img = img.get("image_url") item = ImgsItem() item["category"] = "image_takeya" item["image_urls"] = [img] yield item
def parse(self, response): page = response.text json_contents = demjson.decode(page) for json_content in json_contents["collection"]: image = json_content["thumbnail"] item = ImgsItem() item["category"] = self.category item["image_urls"] = [image] yield item
def parse(self, response): resp = demjson.decode(response.text) data = resp.get("list", []) for img in data: img = img.get("img") item = ImgsItem() item["category"] = self.category item["image_urls"] = [img] yield item
def parse(self, response): # resp = json.loads(response.text,strict=False) resp = demjson.decode(response.text) data = resp.get("items", []) for img in data: pic_url = img.get("pic_url") item = ImgsItem() item["category"] = self.category item["image_urls"] = [pic_url] yield item
def parse(self, response): category=response.meta['category'] resp = demjson.decode(response.text) data = resp.get("items", []) for img in data: pic_url = img.get("pic_url") item = ImgsItem() item["category"] = category item["image_urls"] = [pic_url] yield item
def parse(self, response): page = response.text json_contents = demjson.decode(page) for json_content in json_contents["results"]: urls = json_content["urls"] image = urls["raw"] item = ImgsItem() item["category"] = self.category item["image_urls"] = [image] yield item
def parse_url(self, response): img_patterns = response.xpath( '//li[@class="row"]/p/a[1]/@href').extract() for img_pattern in img_patterns: pattern = re.findall("\d\/(.*?)\.html", img_pattern) img_url = "https://catalog-p.meiji.co.jp/imageDisp.php?type=product&id={}".format( pattern[0]) item = ImgsItem() item["image_urls"] = [img_url] item["category"] = "image_meiji" yield item
def parse_content(self, response): img_urls = response.xpath( '//div[@class="img-package"]/img/@src').extract() item = ImgsItem() res_url = response.url pattern = re.findall(r"0\d\/(.*\.html)", res_url) url = res_url.replace(pattern[0], img_urls[0]) print(url) item["image_urls"] = [url] item["category"] = "image_yamazakipan" yield item
def parse_content(self, response): category = response.xpath('//*[@id="contents"]/div[1]/div/ol/li[2]/a/text()').extract() img_urls = response.xpath('//img[@class="BigImage "]/@src').extract() item = ImgsItem() item["image_urls"] = ["http:"+img_urls[0]] category = category[0] if "/" in category: category=re.sub("/","",category) item["category"] = category+'image_dccoffee' else: item["category"] = category+'image_dccoffee' yield item
def parse_content(self, response): category = response.meta['category'] img_urls = response.xpath('//img[@class="BigImage "]/@src').extract() item = ImgsItem() item["image_urls"] = ["http:"+img_urls[0]] category = category[0] if "/" in category: category=re.sub("/","",category) item["category"] = category+"image_coffeefriend" else: item["category"] = category+"image_coffeefriend" yield item
def parse(self, response): category = response.meta['category'] page = response.text json_contents = demjson.decode(page) for json_content in json_contents["data"]: url = json_content["url"] image = url["baseUrl"] image_url = image + "!p4" item = ImgsItem() item["category"] = category item["image_urls"] = [image_url] yield item