from scrapy import Spider from scrapy.loader import ItemLoader from myspider.items import MyItem class MySpider(Spider): name = 'myspider' start_urls = ['http://example.com'] def parse(self, response): loader = ItemLoader(item=MyItem(), response=response) loader.add_xpath('name', '//div[@class="product-name"]/text()') loader.add_xpath('price', '//div[@class="product-price"]/text()') yield loader.load_item()
from scrapy import Spider from scrapy.loader import ItemLoader from myspider.items import MyItem class MySpider(Spider): name = 'myspider' start_urls = ['http://example.com'] def parse(self, response): loader = ItemLoader(item=MyItem(), response=response) loader.add_xpath('name', '//div[@class="product-name"]/text()') loader.add_xpath('price', '//div[@class="product-price"]/text()') loader.add_value('currency', 'USD') loader.add_value('timestamp', datetime.now()) loader.add_value('source', 'example.com') yield loader.load_item()In this example, we start by importing the necessary classes from Scrapy. We then define a Spider class that uses ItemLoader to load data from a website. We also use the add_value method to add additional data fields to our Item object, including the currency, timestamp, and source of the data. These values can be hard-coded or generated dynamically using Python code. In conclusion, Python scrapy.loader ItemLoader module is a powerful tool for loading and processing data in Scrapy spiders. It offers a flexible and streamlined way to extract data from web pages, and can also be used to clean and transform data before it is stored in Items.