コード例 #1
0
from scrapy.spider import BaseSpider
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.selector import HtmlXPathSelector
from openrecipes.items import RecipeItem
from spider_egg import SpiderEgg

crawler = SpiderEgg.hatch("101cookbooks", "Crawl Spider")

crawlerClass = type(crawler.__name__, (crawler,CrawlSpider), {
    'rules': (
        Rule(SgmlLinkExtractor(allow=('archives/.+\.html')),
             callback='parse_item'),
    )
})
コード例 #2
0
from scrapy.spider import BaseSpider
from scrapy.http import Request
from scrapy.selector import HtmlXPathSelector, XmlXPathSelector
from openrecipes.items import RecipeItem
from spider_egg import SpiderEgg

crawler = SpiderEgg.hatch("101cookbooks", "Feed Spider")

crawlerClass = type(crawler.__name__, (crawler,), {})

print crawlerClass.section