def execute(argv=None): if argv is None: argv = sys.argv project = Project() settings = EngineSettings(module_settings=project.module_settings) inside_project = project.inside_project cmds = get_commands(settings, inside_project) cmd_name = pop_command_name(argv) if not cmd_name: print_commands(cmds, inside_project) sys.exit(0) elif cmd_name not in cmds: print_unknown_command(cmd_name, inside_project) sys.exit(2) # initialize the command cmd = cmds[cmd_name]() # initalize parser parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(), conflict_handler='resolve') parser.usage = 'crawlmi %s %s' % (cmd_name, cmd.syntax()) parser.description = cmd.short_desc() cmd.add_options(parser) options, args = parser.parse_args(args=argv[1:]) # initialize custom settings custom_settings = run_print_help(parser, cmd.get_settings, args, options) settings.custom_settings = custom_settings # initialize engine engine = Engine(settings, project, command_invoked=cmd_name) spider = run_print_help(parser, cmd.get_spider, engine, args, options) engine.set_spider(spider) # set project's data dir. It has to be when all the settings are known. project.set_data_dir(engine.settings.get('DATA_DIR')) engine.setup() cmd.set_engine(engine) # save pidfile if getattr(options, 'pidfile', None): with open(options.pidfile, 'wb') as f: f.write(str(os.getpid()) + os.linesep) # run command run_print_help(parser, cmd.run, args, options)
def get_engine(custom_settings=None, **kwargs): '''Return the engine initialized with the custom settings. ''' custom_settings = custom_settings or {} custom_settings.update(kwargs) settings = EngineSettings(custom_settings=custom_settings) engine = Engine(settings, Project(path=None), clock=Clock()) engine.set_spider(BaseSpider('dummy')) engine.stop_if_idle = False # it is common to use stats and signals in unittests, without full # initialization of the engine engine.stats = MemoryStats(engine) engine.signals = SignalManager(engine) return engine