class EggStorageTest(unittest.TestCase): def setUp(self): d = self.mktemp() os.mkdir(d) config = Config(values={'eggs_dir': d, 'logs_dir': d}) config.cp.add_section('settings') config.cp.set('settings', 'newbot', 'newbot.settings') self.environ = Environment(config, initenv={}) def test_interface(self): verifyObject(IEnvironment, self.environ) def test_get_environment_with_eggfile(self): msg = {'project': 'mybot'} slot = 3 env = self.environ.get_environment(msg, slot, '/path/to/file.egg') self.assertEqual(env['SCRAPY_PROJECT'], 'mybot') self.assert_(env['SCRAPY_SQLITE_DB'].endswith('mybot.db')) self.assert_(env['SCRAPY_LOG_FILE'].endswith('slot3.log')) self.assert_(env['SCRAPY_EGGFILE'].endswith('/path/to/file.egg')) self.failIf('SCRAPY_SETTINGS_MODULE' in env) def test_get_environment_without_eggfile(self): msg = {'project': 'newbot'} slot = 3 env = self.environ.get_environment(msg, slot, None) self.assertEqual(env['SCRAPY_PROJECT'], 'newbot') self.assert_(env['SCRAPY_SQLITE_DB'].endswith('newbot.db')) self.assert_(env['SCRAPY_LOG_FILE'].endswith('slot3.log')) self.assertEqual(env['SCRAPY_SETTINGS_MODULE'], 'newbot.settings') self.failIf('SCRAPY_EGGFILE' in env)
def setUp(self): d = self.mktemp() os.mkdir(d) config = Config(values={'eggs_dir': d, 'logs_dir': d}) config.cp.add_section('settings') config.cp.set('settings', 'newbot', 'newbot.settings') self.environ = Environment(config, initenv={})
def test_get_environment_with_logfile(self): config = Config(values={'items_dir': '', 'logs_dir': '.', 'logs_filename': '{project}-{spider}-{Y}{m}{d}T{H}{M}{S}'}) msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'} slot = 3 environ = Environment(config, initenv={}) now = datetime.datetime.now() env = environ.get_environment(msg, slot) expected_logfilename = now.strftime("mybot-spider-%Y%m%dT%H%M%S") self.assert_(env['SCRAPY_LOG_FILE'], expected_logfilename)
def test_get_environment_with_no_items_dir(self): config = Config(values={'items_dir': '', 'logs_dir': ''}) config.cp.add_section('settings') config.cp.set('settings', 'newbot', 'newbot.settings') msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'} slot = 3 environ = Environment(config, initenv={}) env = environ.get_environment(msg, slot) self.failUnless('SCRAPY_FEED_URI' not in env) self.failUnless('SCRAPY_LOG_FILE' not in env)
def test_get_environment_with_logfile(self): config = Config( values={ 'items_dir': '', 'logs_dir': '.', 'logs_filename': '{project}-{spider}-{job}-{Y}{m}{d}T{H}{M}{S}' }) msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'} slot = 3 environ = Environment(config, initenv={}) now = datetime.datetime.now() env = environ.get_environment(msg, slot) expected_logfilename = now.strftime("mybot-spider-%Y%m%dT%H%M%S") self.assert_(env['SCRAPY_LOG_FILE'], expected_logfilename)
class EnvironmentTest(unittest.TestCase): def setUp(self): d = self.mktemp() os.mkdir(d) config = Config(values={'eggs_dir': d, 'logs_dir': d}) config.cp.add_section('settings') config.cp.set('settings', 'newbot', 'newbot.settings') self.environ = Environment(config, initenv={}) def test_interface(self): verifyObject(IEnvironment, self.environ) def test_get_environment_with_eggfile(self): msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'} slot = 3 env = self.environ.get_environment(msg, slot) self.assertEqual(env['SCRAPY_PROJECT'], 'mybot') self.assertEqual(env['SCRAPY_SLOT'], '3') self.assertEqual(env['SCRAPY_SPIDER'], 'myspider') self.assertEqual(env['SCRAPY_JOB'], 'ID') self.assert_(env['SCRAPY_SQLITE_DB'].endswith('mybot.db')) self.assert_(env['SCRAPY_LOG_FILE'].endswith( os.path.join('mybot', 'myspider', 'ID.log'))) self.assert_(env['SCRAPY_FEED_URI'].endswith( os.path.join('mybot', 'myspider', 'ID.jl'))) self.failIf('SCRAPY_SETTINGS_MODULE' in env)
class EnvironmentTest(unittest.TestCase): def setUp(self): d = self.mktemp() os.mkdir(d) config = Config(values={'eggs_dir': d, 'logs_dir': d}) config.cp.add_section('settings') config.cp.set('settings', 'newbot', 'newbot.settings') self.environ = Environment(config, initenv={}) def test_interface(self): verifyObject(IEnvironment, self.environ) def test_get_environment_with_eggfile(self): msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'} slot = 3 env = self.environ.get_environment(msg, slot) self.assertEqual(env['SCRAPY_PROJECT'], 'mybot') self.assertEqual(env['SCRAPY_SLOT'], '3') self.assertEqual(env['SCRAPY_SPIDER'], 'myspider') self.assertEqual(env['SCRAPY_JOB'], 'ID') self.assertEqual(env['SCRAPY_CONCURRENT_SPIDERS'], '1') self.assert_(env['SCRAPY_SQLITE_DB'].endswith('mybot.db')) self.assert_(env['SCRAPY_LOG_FILE'].endswith(os.path.join('mybot', 'myspider', 'ID.log'))) self.failIf('SCRAPY_SETTINGS_MODULE' in env)
def setUp(self): d = self.mktemp() os.mkdir(d) config = Config(values={"eggs_dir": d, "logs_dir": d}) config.cp.add_section("settings") config.cp.set("settings", "newbot", "newbot.settings") self.environ = Environment(config, initenv={})
def get_application(config): app = Application('Scrapyd') http_port = config.getint('http_port', 6800) bind_address = config.get('bind_address', '0.0.0.0') poll_interval = config.getfloat('poll_interval', 5) poller = QueuePoller(config) eggstorage = FilesystemEggStorage(config) scheduler = SpiderScheduler(config) environment = Environment(config) app.setComponent(IPoller, poller) app.setComponent(IEggStorage, eggstorage) app.setComponent(ISpiderScheduler, scheduler) app.setComponent(IEnvironment, environment) laupath = config.get('launcher', 'scrapyd_mongodb.launcher.Launcher') laucls = load_object(laupath) launcher = laucls(config, app) timer = TimerService(poll_interval, poller.poll) webservice = TCPServer(http_port, server.Site(Root(config, app)), interface=bind_address) log.msg(format=('Scrapyd web console available at ' 'http://%(bind_address)s:%(http_port)s/', ), bind_address=bind_address, http_port=http_port) launcher.setServiceParent(app) timer.setServiceParent(app) webservice.setServiceParent(app) return app
def application(config): app = Application("Scrapyd") http_port = int(environ.get('PORT', config.getint('http_port', 6800))) config.cp.set('scrapyd', 'database_url', environ.get('DATABASE_URL')) poller = Psycopg2QueuePoller(config) eggstorage = FilesystemEggStorage(config) scheduler = Psycopg2SpiderScheduler(config) environment = Environment(config) app.setComponent(IPoller, poller) app.setComponent(IEggStorage, eggstorage) app.setComponent(ISpiderScheduler, scheduler) app.setComponent(IEnvironment, environment) launcher = Launcher(config, app) timer = TimerService(5, poller.poll) webservice = TCPServer(http_port, server.Site(Root(config, app))) log.msg("Scrapyd web console available at http://localhost:%s/ (HEROKU)" % http_port) launcher.setServiceParent(app) timer.setServiceParent(app) webservice.setServiceParent(app) return app
class EnvironmentTest(unittest.TestCase): def setUp(self): d = self.mktemp() os.mkdir(d) config = Config(values={'eggs_dir': d, 'logs_dir': d}) config.cp.add_section('settings') config.cp.set('settings', 'newbot', 'newbot.settings') self.environ = Environment(config, initenv={}) def test_interface(self): verifyObject(IEnvironment, self.environ) def test_get_environment_with_eggfile(self): msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'} slot = 3 env = self.environ.get_environment(msg, slot) self.assertEqual(env['SCRAPY_PROJECT'], 'mybot') self.assertEqual(env['SCRAPY_SLOT'], '3') self.assertEqual(env['SCRAPY_SPIDER'], 'myspider') self.assertEqual(env['SCRAPY_JOB'], 'ID') self.assert_(env['SCRAPY_LOG_FILE'].endswith( os.path.join('mybot', 'myspider', 'ID.log'))) if env.get('SCRAPY_FEED_URI'): # Not compulsory self.assert_(env['SCRAPY_FEED_URI'].startswith('file://{}'.format( os.getcwd()))) self.assert_(env['SCRAPY_FEED_URI'].endswith( os.path.join('mybot', 'myspider', 'ID.jl'))) self.failIf('SCRAPY_SETTINGS_MODULE' in env) def test_get_environment_with_no_items_dir(self): config = Config(values={'items_dir': '', 'logs_dir': ''}) config.cp.add_section('settings') config.cp.set('settings', 'newbot', 'newbot.settings') msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'} slot = 3 environ = Environment(config, initenv={}) env = environ.get_environment(msg, slot) self.failUnless('SCRAPY_FEED_URI' not in env) self.failUnless('SCRAPY_LOG_FILE' not in env) def test_get_environment_with_logfile(self): config = Config( values={ 'items_dir': '', 'logs_dir': '.', 'logs_filename': '{project}-{spider}-{job}-{Y}{m}{d}T{H}{M}{S}' }) msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'} slot = 3 environ = Environment(config, initenv={}) now = datetime.datetime.now() env = environ.get_environment(msg, slot) expected_logfilename = now.strftime("mybot-spider-%Y%m%dT%H%M%S") self.assert_(env['SCRAPY_LOG_FILE'], expected_logfilename)
class EnvironmentTest(unittest.TestCase): def setUp(self): d = self.mktemp() os.mkdir(d) config = Config(values={'eggs_dir': d, 'logs_dir': d}) config.cp.add_section('settings') config.cp.set('settings', 'newbot', 'newbot.settings') self.environ = Environment(config, initenv={}) def test_interface(self): verifyObject(IEnvironment, self.environ) def test_get_environment_with_eggfile(self): msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'} slot = 3 env = self.environ.get_environment(msg, slot) self.assertEqual(env['SCRAPY_PROJECT'], 'mybot') self.assertEqual(env['SCRAPY_SLOT'], '3') self.assertEqual(env['SCRAPY_SPIDER'], 'myspider') self.assertEqual(env['SCRAPY_JOB'], 'ID') self.assert_(env['SCRAPY_LOG_FILE'].endswith(os.path.join('mybot', 'myspider', 'ID.log'))) if env.get('SCRAPY_FEED_URI'): # Not compulsory self.assert_(env['SCRAPY_FEED_URI'].startswith('file://{}'.format(os.getcwd()))) self.assert_(env['SCRAPY_FEED_URI'].endswith(os.path.join('mybot', 'myspider', 'ID.jl'))) self.failIf('SCRAPY_SETTINGS_MODULE' in env) def test_get_environment_with_no_items_dir(self): config = Config(values={'items_dir': '', 'logs_dir': ''}) config.cp.add_section('settings') config.cp.set('settings', 'newbot', 'newbot.settings') msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'} slot = 3 environ = Environment(config, initenv={}) env = environ.get_environment(msg, slot) self.failUnless('SCRAPY_FEED_URI' not in env) self.failUnless('SCRAPY_LOG_FILE' not in env) def test_get_environment_with_logfile(self): config = Config(values={'items_dir': '', 'logs_dir': '.', 'logs_filename': '{project}-{spider}-{Y}{m}{d}T{H}{M}{S}'}) msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'} slot = 3 environ = Environment(config, initenv={}) now = datetime.datetime.now() env = environ.get_environment(msg, slot) expected_logfilename = now.strftime("mybot-spider-%Y%m%dT%H%M%S") self.assert_(env['SCRAPY_LOG_FILE'], expected_logfilename)
def application(config): app = Application("Scrapyd") http_port = config.getint('http_port', 6800) bind_address = config.get('bind_address', '127.0.0.1') poll_interval = config.getfloat('poll_interval', 5) poller = QueuePoller(config) scheduler = SpiderScheduler(config) environment = Environment(config) app.setComponent(IPoller, poller) app.setComponent(ISpiderScheduler, scheduler) app.setComponent(IEnvironment, environment) jspath = config.get('jobstorage', 'scrapyd.jobstorage.MemoryJobStorage') jscls = load_object(jspath) jobstorage = jscls(config) app.setComponent(IJobStorage, jobstorage) eggstorage = config.get('eggstorage', 'scrapyd.eggstorage.FilesystemEggStorage') eggstoragecls = load_object(eggstorage) app.setComponent(IEggStorage, eggstoragecls(config)) laupath = config.get('launcher', 'scrapyd.launcher.Launcher') laucls = load_object(laupath) launcher = laucls(config, app) timer = TimerService(poll_interval, poller.poll) webpath = config.get('webroot', 'scrapyd.website.Root') webcls = load_object(webpath) resource = create_wrapped_resource(webcls, config, app) webservice = TCPServer(http_port, server.Site(resource), interface=bind_address) log.msg( format= "Scrapyd web console available at http://%(bind_address)s:%(http_port)s/", bind_address=bind_address, http_port=http_port) launcher.setServiceParent(app) timer.setServiceParent(app) webservice.setServiceParent(app) return app
def application(config): app = Application("Scrapyd") http_port = config.getint('http_port', 6800) bind_address = config.get('bind_address', '0.0.0.0') poll_interval = config.getfloat('poll_interval', 5) poller = QueuePoller(config) eggstorage = FilesystemEggStorage(config) schedpath = config.get('scheduler', 'scrapyd.scheduler.SpiderScheduler') schedCls = load_object(schedpath) scheduler = schedCls(config, app) environment = Environment(config) pubsub_path = config.get('pubsub', 'scrapyd.pubsub.BasePubSub') pubsubCls = load_object(pubsub_path) pubsub = pubsubCls(config, app) app.setComponent(IPoller, poller) app.setComponent(IEggStorage, eggstorage) app.setComponent(ISpiderScheduler, scheduler) app.setComponent(IEnvironment, environment) app.setComponent(IPubSub, pubsub) laupath = config.get('launcher', 'scrapyd.launcher.Launcher') laucls = load_object(laupath) launcher = laucls(config, app) timer = TimerService(poll_interval, poller.poll) webservice = TCPServer(http_port, server.Site(Root(config, app)), interface=bind_address) log.msg( format= "Scrapyd web console available at http://%(bind_address)s:%(http_port)s/", bind_address=bind_address, http_port=http_port) pubsub.setServiceParent(app) launcher.setServiceParent(app) timer.setServiceParent(app) webservice.setServiceParent(app) return app
class EnvironmentTest(unittest.TestCase): def setUp(self): d = self.mktemp() os.mkdir(d) config = Config(values={'eggs_dir': d, 'logs_dir': d}) config.cp.add_section('settings') config.cp.set('settings', 'newbot', 'newbot.settings') self.environ = Environment(config, initenv={}) def test_interface(self): verifyObject(IEnvironment, self.environ) def test_get_environment_with_eggfile(self): msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'} slot = 3 env = self.environ.get_environment(msg, slot) self.assertEqual(env['SCRAPY_PROJECT'], 'mybot') self.assertEqual(env['SCRAPY_SLOT'], '3') self.assertEqual(env['SCRAPY_SPIDER'], 'myspider') self.assertEqual(env['SCRAPY_JOB'], 'ID') self.assert_(env['SCRAPY_LOG_FILE'].endswith( os.path.join('mybot', 'myspider', 'ID.log'))) if env.get('SCRAPY_FEED_URI'): # Not compulsory self.assert_(env['SCRAPY_FEED_URI'].startswith('file://{}'.format( os.getcwd()))) self.assert_(env['SCRAPY_FEED_URI'].endswith( os.path.join('mybot', 'myspider', 'ID.jl'))) self.assertNotIn('SCRAPY_SETTINGS_MODULE', env) def test_get_environment_with_no_items_dir(self): config = Config(values={'items_dir': '', 'logs_dir': ''}) config.cp.add_section('settings') config.cp.set('settings', 'newbot', 'newbot.settings') msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'} slot = 3 environ = Environment(config, initenv={}) env = environ.get_environment(msg, slot) self.assertNotIn('SCRAPY_FEED_URI', env) self.assertNotIn('SCRAPY_LOG_FILE', env)
def application(config): app = Application("Scrapyd") http_port = config.getint('http_port', 6800) if 'PORT' in os.environ: http_port = int(os.environ.get('PORT')) bind_address = '0.0.0.0' if 'PORT' in os.environ else config.get('bind_address', '127.0.0.1') poll_interval = config.getfloat('poll_interval', 5) poller = QueuePoller(config) eggstorage = FilesystemEggStorage(config) scheduler = SpiderScheduler(config) environment = Environment(config) app.setComponent(IPoller, poller) app.setComponent(IEggStorage, eggstorage) app.setComponent(ISpiderScheduler, scheduler) app.setComponent(IEnvironment, environment) laupath = config.get('launcher', 'scrapyd.launcher.Launcher') laucls = load_object(laupath) launcher = laucls(config, app) webpath = config.get('webroot', 'scrapyd.website.Root') webcls = load_object(webpath) timer = TimerService(poll_interval, poller.poll) webservice = TCPServer(http_port, server.Site(webcls(config, app)), interface=bind_address) log.msg(format="Scrapyd web console available at http://%(bind_address)s:%(http_port)s/", bind_address=bind_address, http_port=http_port) launcher.setServiceParent(app) timer.setServiceParent(app) webservice.setServiceParent(app) return app
class EnvironmentTest(unittest.TestCase): def setUp(self): d = self.mktemp() os.mkdir(d) config = Config(values={"eggs_dir": d, "logs_dir": d}) config.cp.add_section("settings") config.cp.set("settings", "newbot", "newbot.settings") self.environ = Environment(config, initenv={}) def test_interface(self): verifyObject(IEnvironment, self.environ) def test_get_environment_with_eggfile(self): msg = {"_project": "mybot", "_spider": "myspider", "_job": "ID"} slot = 3 env = self.environ.get_environment(msg, slot) self.assertEqual(env["SCRAPY_PROJECT"], "mybot") self.assertEqual(env["SCRAPY_SLOT"], "3") self.assertEqual(env["SCRAPY_SPIDER"], "myspider") self.assertEqual(env["SCRAPY_JOB"], "ID") self.assert_(env["SCRAPY_LOG_FILE"].endswith(os.path.join("mybot", "myspider", "ID.log"))) self.assert_(env["SCRAPY_FEED_URI"].endswith(os.path.join("mybot", "myspider", "ID.jl"))) self.failIf("SCRAPY_SETTINGS_MODULE" in env)
class EnvironmentTest(unittest.TestCase): def setUp(self): d = self.mktemp() os.mkdir(d) config = Config(values={'eggs_dir': d, 'logs_dir': d}) config.cp.add_section('settings') config.cp.set('settings', 'newbot', 'newbot.settings') self.environ = Environment(config, initenv={}) def test_interface(self): verifyObject(IEnvironment, self.environ) def test_get_environment_with_eggfile(self): msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'} slot = 3 env = self.environ.get_environment(msg, slot) self.assertEqual(env['SCRAPY_PROJECT'], 'mybot') self.assertEqual(env['SCRAPY_SLOT'], '3') self.assertEqual(env['SCRAPY_SPIDER'], 'myspider') self.assertEqual(env['SCRAPY_JOB'], 'ID') self.assert_(env['SCRAPY_LOG_FILE'].endswith(os.path.join('mybot', 'myspider', 'ID.log'))) if env.get('SCRAPY_FEED_URI'): #not compulsory self.assert_(env['SCRAPY_FEED_URI'].endswith(os.path.join('mybot', 'myspider', 'ID.jl'))) self.failIf('SCRAPY_SETTINGS_MODULE' in env) def test_get_environment_with_no_items_dir(self): config = Config(values={'items_dir': '', 'logs_dir': ''}) config.cp.add_section('settings') config.cp.set('settings', 'newbot', 'newbot.settings') msg = {'_project': 'mybot', '_spider': 'myspider', '_job': 'ID'} slot = 3 environ = Environment(config, initenv={}) env = environ.get_environment(msg, slot) self.failUnless('SCRAPY_FEED_URI' not in env) self.failUnless('SCRAPY_LOG_FILE' not in env)