def test_get_crawl_args(self): msg = {'_project': 'lolo', '_spider': 'lala'} self.assertEqual(get_crawl_args(msg), ['lala']) msg = {'_project': 'lolo', '_spider': 'lala', 'arg1': u'val1'} cargs = get_crawl_args(msg) self.assertEqual(cargs, ['lala', '-a', 'arg1=val1']) assert all(isinstance(x, str) for x in cargs), cargs
def _spawn_process(self, message, slot): msg = native_stringify_dict(message, keys_only=False) file_settings = msg.pop('file_settings', None) project = msg['_project'] args = [sys.executable, '-m', self.runner, 'crawl'] args += get_crawl_args(msg) e = self.app.getComponent(IEnvironment) env = e.get_environment(msg, slot) env = native_stringify_dict(env, keys_only=False) tmpfile = None if file_settings: with NamedTemporaryFile('w', encoding='utf-8', suffix='.py', delete=False) as tmp: tmp.write(file_settings) path, name_file = os.path.split(tmp.name) module = os.path.splitext(name_file)[0] env['PYTHONPATH'] = '{}:{}'.format(path, os.environ.get('PYTHONPATH')) \ if os.environ.get('PYTHONPATH') else path env['SCRAPY_SETTINGS_MODULE_TO_OVERRIDE'] = module tmpfile = tmp.name pp = ScrapyProcessProtocol(slot, project, msg['_spider'], \ msg['_job'], env, tmpfile) pp.deferred.addBoth(self._process_finished, slot) reactor.spawnProcess(pp, sys.executable, args=args, env=env) self.processes[slot] = pp
def crawl(self, args): env = os.environ.copy() cmd = ['scrapy', 'crawl'] args['_spider'] = args.get('spider') args['_project'] = args.get('project') cargs = get_crawl_args(args) cmd = cmd + cargs + ['-t', 'json', '-o', '-'] process = Popen(cmd, stdin=PIPE, stdout=PIPE, env=env) out, _ = process.communicate() return out
def test_get_crawl_args_with_settings(self): msg = { '_project': 'lolo', '_spider': 'lala', 'arg1': u'val1', 'settings': { 'ONE': 'two' } } cargs = get_crawl_args(msg) self.assertEqual(cargs, ['lala', '-a', 'arg1=val1', '-s', 'ONE=two']) assert all(isinstance(x, str) for x in cargs), cargs
def _spawn_process(self, message, slot): msg = native_stringify_dict(message, keys_only=False) project = msg['_project'] args = [sys.executable, '-m', self.runner, 'crawl'] args += get_crawl_args(msg) e = self.app.getComponent(IEnvironment) env = e.get_environment(msg, slot) env = native_stringify_dict(env, keys_only=False) pp = ScrapyProcessProtocol(slot, project, msg['_spider'], msg['_job'], env) pp.deferred.addBoth(self._process_finished, slot) reactor.spawnProcess(pp, sys.executable, args=args, env=env) self.processes[slot] = pp
def _spawn_process(self, message, slot): msg = stringify_dict(message, keys_only=False) project = msg['_project'] args = [sys.executable, '-m', self.runner, 'crawl'] args += get_crawl_args(msg) e = self.app.getComponent(IEnvironment) env = e.get_environment(msg, slot) env = stringify_dict(env, keys_only=False) pp = ScrapyProcessProtocol(slot, project, msg['_spider'], \ msg['_job'], env) pp.deferred.addBoth(self._process_finished, slot) reactor.spawnProcess(pp, sys.executable, args=args, env=env) self.processes[slot] = pp
def _spawn_process(self, message, slot): msg = stringify_dict(message, keys_only=False) project = msg['_project'] args = [sys.executable, '-m', self.runner, 'crawl'] args += get_crawl_args(msg) e = self.app.getComponent(IEnvironment) env = e.get_environment(msg, slot) env = stringify_dict(env, keys_only=False) # Sending the start_url parameter to the method pp = ScrapyProcessProtocol(slot, project, msg['_spider'], \ msg['_job'], env, msg['start_url'],msg['currency'],msg['country'],msg['site_id']) pp.deferred.addBoth(self._process_finished, slot) reactor.spawnProcess(pp, sys.executable, args=args, env=env) self.processes[slot] = pp
def _spawn_process(self, message, slot): msg = native_stringify_dict(message, keys_only=False) project = msg['_project'] args = [sys.executable, '-m', self.runner, 'crawl'] args += get_crawl_args(msg) e = self.app.getComponent(IEnvironment) env = e.get_environment(msg, slot) env = native_stringify_dict(env, keys_only=False) log.msg(format='Scrapyd %(version)s started: name=%(_name)r, env=%(env)r', version=__version__, name=env.get('_name',''), env=env) pp = ScrapyProcessProtocol(slot, project, msg['_spider'], \ msg['_job'], env) pp.deferred.addBoth(self._process_finished, slot) reactor.spawnProcess(pp, sys.executable, args=args, env=env) self.processes[slot] = pp
def test_get_crawl_args_with_settings(self): msg = {'_project': 'lolo', '_spider': 'lala', 'arg1': u'val1', 'settings': {'ONE': 'two'}} cargs = get_crawl_args(msg) self.assertEqual(cargs, ['lala', '-a', 'arg1=val1', '-s', 'ONE=two']) assert all(isinstance(x, str) for x in cargs), cargs