Example #1
0
 def test_get_crawl_args(self):
     msg = {'_project': 'lolo', '_spider': 'lala'}
     self.assertEqual(get_crawl_args(msg), ['lala'])
     msg = {'_project': 'lolo', '_spider': 'lala', 'arg1': u'val1'}
     cargs = get_crawl_args(msg)
     self.assertEqual(cargs, ['lala', '-a', 'arg1=val1'])
     assert all(isinstance(x, str) for x in cargs), cargs
Example #2
0
 def test_get_crawl_args(self):
     msg = {'_project': 'lolo', '_spider': 'lala'}
     self.assertEqual(get_crawl_args(msg), ['lala'])
     msg = {'_project': 'lolo', '_spider': 'lala', 'arg1': u'val1'}
     cargs = get_crawl_args(msg)
     self.assertEqual(cargs, ['lala', '-a', 'arg1=val1'])
     assert all(isinstance(x, str) for x in cargs), cargs
Example #3
0
 def _spawn_process(self, message, slot):
     msg = native_stringify_dict(message, keys_only=False)
     file_settings = msg.pop('file_settings', None)
     project = msg['_project']
     args = [sys.executable, '-m', self.runner, 'crawl']
     args += get_crawl_args(msg)
     e = self.app.getComponent(IEnvironment)
     env = e.get_environment(msg, slot)
     env = native_stringify_dict(env, keys_only=False)
     tmpfile = None
     if file_settings:
         with NamedTemporaryFile('w',
                                 encoding='utf-8',
                                 suffix='.py',
                                 delete=False) as tmp:
             tmp.write(file_settings)
         path, name_file = os.path.split(tmp.name)
         module = os.path.splitext(name_file)[0]
         env['PYTHONPATH'] = '{}:{}'.format(path, os.environ.get('PYTHONPATH')) \
             if os.environ.get('PYTHONPATH') else path
         env['SCRAPY_SETTINGS_MODULE_TO_OVERRIDE'] = module
         tmpfile = tmp.name
     pp = ScrapyProcessProtocol(slot, project, msg['_spider'], \
         msg['_job'], env, tmpfile)
     pp.deferred.addBoth(self._process_finished, slot)
     reactor.spawnProcess(pp, sys.executable, args=args, env=env)
     self.processes[slot] = pp
Example #4
0
 def crawl(self, args):
     env = os.environ.copy()
     cmd = ['scrapy', 'crawl']
     args['_spider'] = args.get('spider')
     args['_project'] = args.get('project')
     cargs = get_crawl_args(args)
     cmd = cmd + cargs + ['-t', 'json', '-o', '-']
     process = Popen(cmd, stdin=PIPE, stdout=PIPE, env=env)
     out, _ = process.communicate()
     return out
Example #5
0
 def test_get_crawl_args_with_settings(self):
     msg = {
         '_project': 'lolo',
         '_spider': 'lala',
         'arg1': u'val1',
         'settings': {
             'ONE': 'two'
         }
     }
     cargs = get_crawl_args(msg)
     self.assertEqual(cargs, ['lala', '-a', 'arg1=val1', '-s', 'ONE=two'])
     assert all(isinstance(x, str) for x in cargs), cargs
Example #6
0
 def _spawn_process(self, message, slot):
     msg = native_stringify_dict(message, keys_only=False)
     project = msg['_project']
     args = [sys.executable, '-m', self.runner, 'crawl']
     args += get_crawl_args(msg)
     e = self.app.getComponent(IEnvironment)
     env = e.get_environment(msg, slot)
     env = native_stringify_dict(env, keys_only=False)
     pp = ScrapyProcessProtocol(slot, project, msg['_spider'], msg['_job'], env)
     pp.deferred.addBoth(self._process_finished, slot)
     reactor.spawnProcess(pp, sys.executable, args=args, env=env)
     self.processes[slot] = pp
Example #7
0
 def _spawn_process(self, message, slot):
     msg = stringify_dict(message, keys_only=False)
     project = msg['_project']
     args = [sys.executable, '-m', self.runner, 'crawl']
     args += get_crawl_args(msg)
     e = self.app.getComponent(IEnvironment)
     env = e.get_environment(msg, slot)
     env = stringify_dict(env, keys_only=False)
     pp = ScrapyProcessProtocol(slot, project, msg['_spider'], \
         msg['_job'], env)
     pp.deferred.addBoth(self._process_finished, slot)
     reactor.spawnProcess(pp, sys.executable, args=args, env=env)
     self.processes[slot] = pp
Example #8
0
 def _spawn_process(self, message, slot):
     msg = stringify_dict(message, keys_only=False)
     project = msg['_project']
     args = [sys.executable, '-m', self.runner, 'crawl']
     args += get_crawl_args(msg)
     e = self.app.getComponent(IEnvironment)
     env = e.get_environment(msg, slot)
     env = stringify_dict(env, keys_only=False)
     # Sending the start_url parameter to the method
     pp = ScrapyProcessProtocol(slot, project, msg['_spider'], \
         msg['_job'], env, msg['start_url'],msg['currency'],msg['country'],msg['site_id'])
     pp.deferred.addBoth(self._process_finished, slot)
     reactor.spawnProcess(pp, sys.executable, args=args, env=env)
     self.processes[slot] = pp
Example #9
0
 def _spawn_process(self, message, slot):
     msg = native_stringify_dict(message, keys_only=False)
     project = msg['_project']
     args = [sys.executable, '-m', self.runner, 'crawl']
     args += get_crawl_args(msg)
     e = self.app.getComponent(IEnvironment)
     env = e.get_environment(msg, slot)
     env = native_stringify_dict(env, keys_only=False)
     log.msg(format='Scrapyd %(version)s started: name=%(_name)r, env=%(env)r', version=__version__, name=env.get('_name',''), env=env)
     pp = ScrapyProcessProtocol(slot, project, msg['_spider'], \
         msg['_job'], env)
     pp.deferred.addBoth(self._process_finished, slot)
     reactor.spawnProcess(pp, sys.executable, args=args, env=env)
     self.processes[slot] = pp
Example #10
0
 def test_get_crawl_args_with_settings(self):
     msg = {'_project': 'lolo', '_spider': 'lala', 'arg1': u'val1', 'settings': {'ONE': 'two'}}
     cargs = get_crawl_args(msg)
     self.assertEqual(cargs, ['lala', '-a', 'arg1=val1', '-s', 'ONE=two'])
     assert all(isinstance(x, str) for x in cargs), cargs