def __init__(self, username, password, ajaxcount=100): self.username = username self.password = password self.ajaxcount = ajaxcount from scrapy import optional_features optional_features.remove('boto')
def __init__(self, username, password, ajaxcount=100): self.username = username self.password = password self.ajaxcount = ajaxcount from scrapy import optional_features optional_features.remove('boto')
# -*- coding: utf-8 -*- import sys from scrapy.crawler import CrawlerProcess from spiders.wish_rating import WishRatingSpider if __name__ == '__main__': from scrapy import optional_features optional_features.remove('boto') username = sys.argv[1] password = sys.argv[2] num_spider = int(sys.argv[3]) redis_key = sys.argv[4] process = CrawlerProcess({ 'ITEM_PIPELINES': { 'pipelines.WfchPipeline': 300, }, 'LOG_LEVEL': 'INFO', 'LOG_FILE': 'target/log.rating' }) for i in xrange(num_spider): process.crawl(WishRatingSpider, username=username, password=password, redis_key=redis_key)
# -*- coding: utf-8 -*- from scrapy import optional_features from scrapy.crawler import CrawlerProcess from spiders.store_feedback import StoreFeedbackSpider optional_features.remove('boto') settings = {'TELNETCONSOLE_ENABLED': False, 'COOKIES_ENABLED': False, 'ITEM_PIPELINES': { 'pipelines.DuplicatePipeline': 200, 'pipelines.ToRedisPipeline': 300, 'pipelines.ToMongoPipeline': 400, }, 'LOG_LEVEL': 'INFO', 'prefix': 'test', 'base_url': ''} # first step process = CrawlerProcess(settings) process.crawl(StoreFeedbackSpider) process.start()