def run(self, args, opts): # pass custom option to spiders opts.spargs = {} opts.spargs['use_proxy'] = opts.use_proxy opts.spargs['require_js'] = opts.require_js opts.spargs['keep_old_data'] = opts.keep_old_data opts.spargs['crawl_all_cinemas'] = opts.crawl_all_cinemas opts.spargs['crawl_all_movies'] = opts.crawl_all_movies opts.spargs['crawl_booking_data'] = opts.crawl_booking_data opts.spargs['movie_list'] = opts.movie_list opts.spargs['cinema_list'] = opts.cinema_list opts.spargs['date'] = opts.date if opts.all_showing: self.run_multiple_spiders(args, opts) else: Command.run(self, args, opts)
def add_options(self, parser): Command.add_options(self, parser) # custom options group = OptionGroup(parser, "Custom Options") group.add_option("--all_showing", action="store_true", default=False, help="run all showing spider") group.add_option("--use_proxy", action="store_true", default=False, help="use setting's proxy when crawling") group.add_option("--require_js", action="store_true", default=False, help="use phantomjs to process page") group.add_option("--keep_old_data", action="store_true", default=False, help="keep old data when crawling") group.add_option("--crawl_all_cinemas", action="store_true", default=False, help="crawl all cinemas") group.add_option("--crawl_all_movies", action="store_true", default=False, help="crawl all movies") group.add_option("--crawl_booking_data", action="store_true", default=False, help="crawl booking data for each crawled showing") group.add_option("--movie_list", action="append", default=[], metavar="moviename", help="crawl movie list, default is 君の名は。") group.add_option("--cinema_list", action="append", default=[], metavar="cinemaname", help="crawl cinema list") tomorrow = arrow.now('UTC+9').shift(days=+1) group.add_option("--date", default=tomorrow.format('YYYYMMDD'), help="crawl date, default is tomorrow") parser.add_option_group(group)
def run_crawler(argv=None, settings=None): """Run the scrapy crawler bounded to registered spiders. This function is suitable for standalone scripts. Usage:: # mimic 'scrapy crawl' command having these two spiders available SpiderManager.register(FooSpider) SpiderManager.register(BarSpider) run_crawler() """ argv = argv or sys.argv settings = _build_settings(settings) # load spider manager from this module settings.overrides.update({ 'SPIDER_MANAGER_CLASS': '%s.%s' % (__name__, SpiderManager.__name__), }) crawler = CrawlerProcess(settings) crawler.install() parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter()) parser.add_option('-l', '--list', action='store_true', help="List available spiders") cmd = CrawlCommand() settings.defaults.update(cmd.default_settings) cmd.settings = settings cmd.add_options(parser) parser.usage = "%s %s" % (argv[0], cmd.syntax()) opts, args = parser.parse_args() if opts.list: settings.defaults.update(ListCommand.default_settings) listcmd = ListCommand() listcmd.set_crawler(crawler) listcmd.run(args, opts) sys.exit(listcmd.exitcode) else: cmdline._run_print_help(parser, cmd.process_options, args, opts) cmd.set_crawler(crawler) cmdline._run_print_help(parser, cmdline._run_command, cmd, args, opts) sys.exit(cmd.exitcode)
def process_options(self, args, opts): Command.process_options(self, args, opts)
def run(self, args, opts): CrawlCommand.run(self, args, opts)
def process_options(self, args, opts): CrawlCommand.process_options(self, args, opts) if opts.db: settings.overrides['FINANCEDB_ENABLED'] = True
def add_options(self, parser): CrawlCommand.add_options(self, parser) parser.add_option("--db", action="store_true", help="Save scraped data into application db")