Esempio n. 1
0
    def test_get_spider_list(self):
        # mybot.egg has two spiders, spider1 and spider2
        self.add_test_version('mybot.egg', 'mybot', 'r1')
        spiders = get_spider_list('mybot', pythonpath=get_pythonpath_scrapyd())
        self.assertEqual(sorted(spiders), ['spider1', 'spider2'])

        # mybot2.egg has three spiders, spider1, spider2 and spider3...
        # BUT you won't see it here because it's cached.
        # Effectivelly it's like if version was never added
        self.add_test_version('mybot2.egg', 'mybot', 'r2')
        spiders = get_spider_list('mybot', pythonpath=get_pythonpath_scrapyd())
        self.assertEqual(sorted(spiders), ['spider1', 'spider2'])

        # Let's invalidate the cache for this project...
        UtilsCache.invalid_cache('mybot')

        # Now you get the updated list
        spiders = get_spider_list('mybot', pythonpath=get_pythonpath_scrapyd())
        self.assertEqual(sorted(spiders), ['spider1', 'spider2', 'spider3'])

        # Let's re-deploy mybot.egg and clear cache. It now sees 2 spiders
        self.add_test_version('mybot.egg', 'mybot', 'r3')
        UtilsCache.invalid_cache('mybot')
        spiders = get_spider_list('mybot', pythonpath=get_pythonpath_scrapyd())
        self.assertEqual(sorted(spiders), ['spider1', 'spider2'])

        # And re-deploying the one with three (mybot2.egg) with a version that
        # isn't the higher, won't change what get_spider_list() returns.
        self.add_test_version('mybot2.egg', 'mybot', 'r1a')
        UtilsCache.invalid_cache('mybot')
        spiders = get_spider_list('mybot', pythonpath=get_pythonpath_scrapyd())
        self.assertEqual(sorted(spiders), ['spider1', 'spider2'])
Esempio n. 2
0
    def test_get_spider_list(self):
        # mybot.egg has two spiders, spider1 and spider2
        self.add_test_version('mybot.egg', 'mybot', 'r1')
        spiders = get_spider_list('mybot', pythonpath=get_pythonpath_scrapyd())
        self.assertEqual(sorted(spiders), ['spider1', 'spider2'])

        # mybot2.egg has three spiders, spider1, spider2 and spider3...
        # BUT you won't see it here because it's cached.
        # Effectivelly it's like if version was never added
        self.add_test_version('mybot2.egg', 'mybot', 'r2')
        spiders = get_spider_list('mybot', pythonpath=get_pythonpath_scrapyd())
        self.assertEqual(sorted(spiders), ['spider1', 'spider2'])

        # Let's invalidate the cache for this project...
        UtilsCache.invalid_cache('mybot')

        # Now you get the updated list
        spiders = get_spider_list('mybot', pythonpath=get_pythonpath_scrapyd())
        self.assertEqual(sorted(spiders), ['spider1', 'spider2', 'spider3'])

        # Let's re-deploy mybot.egg and clear cache. It now sees 2 spiders
        self.add_test_version('mybot.egg', 'mybot', 'r3')
        UtilsCache.invalid_cache('mybot')
        spiders = get_spider_list('mybot', pythonpath=get_pythonpath_scrapyd())
        self.assertEqual(sorted(spiders), ['spider1', 'spider2'])

        # And re-deploying the one with three (mybot2.egg) with a version that
        # isn't the higher, won't change what get_spider_list() returns.
        self.add_test_version('mybot2.egg', 'mybot', 'r1a')
        UtilsCache.invalid_cache('mybot')
        spiders = get_spider_list('mybot', pythonpath=get_pythonpath_scrapyd())
        self.assertEqual(sorted(spiders), ['spider1', 'spider2'])
Esempio n. 3
0
 def post(self, request, project, version, egg, **kwargs):
     eggf = StringIO(egg)
     self.root.eggstorage.put(eggf, project, version)
     spiders = get_spider_list(project)
     self.root.update_projects()
     return {"status": "ok", "project": project, "version": version, \
         "spiders": len(spiders)}
Esempio n. 4
0
 def render_POST(self, txrequest):
     args = native_stringify_dict(copy(txrequest.args), keys_only=False)
     settings = args.pop('setting', [])
     settings = dict(x.split('=', 1) for x in settings)
     args = dict((k, v[0]) for k, v in args.items())
     project = args.pop('project')
     spider = args.pop('spider')
     version = args.get('_version', '')
     priority = float(args.pop('priority', 0))
     spiders = get_spider_list(project, version=version)
     if spider not in spiders:
         return {
             "status": "error",
             "message": "spider '%s' not found" % spider
         }
     args['settings'] = settings
     jobid = args.pop('jobid', uuid.uuid1().hex)
     args['_job'] = jobid
     self.root.scheduler.schedule(project,
                                  spider,
                                  priority=priority,
                                  **args)
     return {
         "node_name": self.root.nodename,
         "status": "ok",
         "jobid": jobid
     }
Esempio n. 5
0
 def render_GET(self, txrequest):
     projects = {}
     for project in self.root.scheduler.list_projects():
         spiders = get_spider_list(project, runner=self.root.runner)
         versions = self.root.eggstorage.list(project)
         projects[project] = {"spiders": spiders, "versions": versions}
     return {"status": "ok", "projects":projects}
Esempio n. 6
0
 def render_POST(self, txrequest):
     project = txrequest.args['project'][0]
     version = txrequest.args['version'][0]
     eggf = StringIO(txrequest.args['egg'][0])
     self.root.eggstorage.put(eggf, project, version)
     spiders = get_spider_list(project)
     self.root.update_projects()
     return {"status": "ok", "project": project, "version": version, \
         "spiders": len(spiders)}
Esempio n. 7
0
 def projects():
     project_spiders = []
     scrapyd_error = None
     for project in self.root.scheduler.list_projects():
         if project is '.DS_Store':
             pass
         try:
             project_spiders.append({'name': project, 'spiders': get_spider_list(project, runner=self.root.runner)})
         except RuntimeError, e:
             scrapyd_error = "%s: %s" % (project, e.message)
Esempio n. 8
0
 def render_GET(self, txrequest):
     args = native_stringify_dict(copy(txrequest.args), keys_only=False)
     project = args['project'][0]
     version = args.get('_version', [''])[0]
     spiders = get_spider_list(project,
                               runner=self.root.runner,
                               version=version)
     return {
         "node_name": self.root.nodename,
         "status": "ok",
         "spiders": spiders
     }
Esempio n. 9
0
 def render_POST(self, txrequest):
     eggf = BytesIO(txrequest.args.pop(b'egg')[0])
     args = native_stringify_dict(copy(txrequest.args), keys_only=False)
     project = args['project'][0]
     version = args['version'][0]
     self.root.eggstorage.put(eggf, project, version)
     spiders = get_spider_list(project, version=version)
     self.root.update_projects()
     UtilsCache.invalid_cache(project)
     return {
         "node_name": self.root.nodename,
         "status": "ok",
         "project": project,
         "version": version,
         "spiders": len(spiders)
     }
Esempio n. 10
0
 def test_get_spider_list(self):
     path = os.path.abspath(self.mktemp())
     j = os.path.join
     eggs_dir = j(path, 'eggs')
     os.makedirs(eggs_dir)
     dbs_dir = j(path, 'dbs')
     os.makedirs(dbs_dir)
     logs_dir = j(path, 'logs')
     os.makedirs(logs_dir)
     os.chdir(path)
     with open('scrapyd.conf', 'w') as f:
         f.write("[scrapyd]\n")
         f.write("eggs_dir = %s\n" % eggs_dir)
         f.write("dbs_dir = %s\n" % dbs_dir)
         f.write("logs_dir = %s\n" % logs_dir)
     app = get_application()
     eggstorage = app.getComponent(IEggStorage)
     eggfile = StringIO(get_data(__package__, 'mybot.egg'))
     eggstorage.put(eggfile, 'mybot', 'r1')
     self.assertEqual(sorted(get_spider_list('mybot')), ['spider1', 'spider2'])
Esempio n. 11
0
 def test_get_spider_list(self):
     path = os.path.abspath(self.mktemp())
     j = os.path.join
     eggs_dir = j(path, 'eggs')
     os.makedirs(eggs_dir)
     dbs_dir = j(path, 'dbs')
     os.makedirs(dbs_dir)
     logs_dir = j(path, 'logs')
     os.makedirs(logs_dir)
     os.chdir(path)
     with open('scrapyd.conf', 'w') as f:
         f.write("[scrapyd]\n")
         f.write("eggs_dir = %s\n" % eggs_dir)
         f.write("dbs_dir = %s\n" % dbs_dir)
         f.write("logs_dir = %s\n" % logs_dir)
     app = get_application()
     eggstorage = app.getComponent(IEggStorage)
     eggfile = StringIO(get_data(__package__, 'mybot.egg'))
     eggstorage.put(eggfile, 'mybot', 'r1')
     self.assertEqual(sorted(get_spider_list('mybot')), ['spider1', 'spider2'])
Esempio n. 12
0
 def test_get_spider_list_unicode(self):
     # mybotunicode.egg has two spiders, araña1 and araña2
     self.add_test_version('mybotunicode.egg', 'mybotunicode', 'r1')
     spiders = get_spider_list('mybotunicode',
                               pythonpath=get_pythonpath_scrapyd())
     self.assertEqual(sorted(spiders), [u'araña1', u'araña2'])
Esempio n. 13
0
 def get_spiders(self, request, pk, **kwargs):
     if pk not in self.root.scheduler.list_projects():
         return corepost.Response(404, entity="Not found",
              headers={"Content-Type":"text/plain"})
     return get_spider_list(pk, runner=self.root.runner)
Esempio n. 14
0
 def test_get_spider_list_unicode(self):
     # mybotunicode.egg has two spiders, araña1 and araña2
     self.add_test_version('mybotunicode.egg', 'mybotunicode', 'r1')
     spiders = get_spider_list('mybotunicode', pythonpath=get_pythonpath_scrapyd())
     self.assertEqual(sorted(spiders), [u'araña1', u'araña2'])
Esempio n. 15
0
 def render_GET(self, txrequest):
     project = txrequest.args['project'][0]
     spiders = get_spider_list(project, runner=self.root.runner)
     return {"status": "ok", "spiders": spiders}