Beispiel #1
0
    def __init__(self, task, config=None):
        '''
        @type task: SpiderTask
        '''
        self.task = task
        if config is None:
            config = AgentConfig()
        if config.get('server_https_port'):
            self.service_base = 'https://%s:%d' % (config.get('server'), config.getint('server_https_port'))
        else:
            self.service_base = 'http://%s:%d' % (config.get('server'), config.getint('server_port'))
        self._f_output = None
        self.output_file = None
        self.p = None
        self.check_process_callback = None
        self.items_file = None
        self.ret_code = None
        self.workspace_dir = tempfile.mkdtemp(prefix='ddjob-%s-%s-' % (task.project_name, task.id))
        if not os.path.exists(self.workspace_dir):
            os.makedirs(self.workspace_dir)
        self.output_file = str(os.path.join(self.workspace_dir, '%s.log' % self.task.id))
        self._f_output = open(self.output_file, 'w')

        eggs_dir = os.path.join(self.workspace_dir, 'eggs')
        if not os.path.exists(eggs_dir):
            os.mkdir(eggs_dir)
        self.egg_storage = FilesystemEggStorage(scrapyd.config.Config(values={'eggs_dir': eggs_dir}))
        self.on_subprocess_start = None
def get_application(config):
    app = Application('Scrapyd')
    http_port = config.getint('http_port', 6800)
    bind_address = config.get('bind_address', '0.0.0.0')
    poll_interval = config.getfloat('poll_interval', 5)

    poller = QueuePoller(config)
    eggstorage = FilesystemEggStorage(config)
    scheduler = SpiderScheduler(config)
    environment = Environment(config)

    app.setComponent(IPoller, poller)
    app.setComponent(IEggStorage, eggstorage)
    app.setComponent(ISpiderScheduler, scheduler)
    app.setComponent(IEnvironment, environment)

    laupath = config.get('launcher', 'scrapyd_mongodb.launcher.Launcher')
    laucls = load_object(laupath)
    launcher = laucls(config, app)

    timer = TimerService(poll_interval, poller.poll)
    webservice = TCPServer(http_port,
                           server.Site(Root(config, app)),
                           interface=bind_address)
    log.msg(format=('Scrapyd web console available at '
                    'http://%(bind_address)s:%(http_port)s/', ),
            bind_address=bind_address,
            http_port=http_port)

    launcher.setServiceParent(app)
    timer.setServiceParent(app)
    webservice.setServiceParent(app)

    return app
Beispiel #3
0
def application(config):
    app = Application("Scrapyd")
    http_port = int(environ.get('PORT', config.getint('http_port', 6800)))
    config.cp.set('scrapyd', 'database_url', environ.get('DATABASE_URL'))

    poller = Psycopg2QueuePoller(config)
    eggstorage = FilesystemEggStorage(config)
    scheduler = Psycopg2SpiderScheduler(config)
    environment = Environment(config)

    app.setComponent(IPoller, poller)
    app.setComponent(IEggStorage, eggstorage)
    app.setComponent(ISpiderScheduler, scheduler)
    app.setComponent(IEnvironment, environment)

    launcher = Launcher(config, app)
    timer = TimerService(5, poller.poll)
    webservice = TCPServer(http_port, server.Site(Root(config, app)))
    log.msg("Scrapyd web console available at http://localhost:%s/ (HEROKU)" %
            http_port)

    launcher.setServiceParent(app)
    timer.setServiceParent(app)
    webservice.setServiceParent(app)

    return app
Beispiel #4
0
 def __init__(self, project_name):
     project_workspace_dir = os.path.abspath(
         os.path.join('workspace', project_name))
     self.project_workspace_dir = project_workspace_dir
     self.project_name = project_name
     self.egg_storage = FilesystemEggStorage(scrapyd.config.Config())
     if sys.platform.startswith('linux'):
         self.pip = os.path.join(project_workspace_dir, 'bin', 'pip')
         self.python = os.path.join(project_workspace_dir, 'bin', 'python')
     elif sys.platform.startswith('win'):
         self.pip = os.path.join(project_workspace_dir, 'Scripts',
                                 'pip.exe')
         self.python = os.path.join(project_workspace_dir, 'Scripts',
                                    'python.exe')
     else:
         raise NotImplementedError('Unsupported system %s' % sys.platform)
Beispiel #5
0
def project_environment(project):
    config = Config()
    eggstorage = FilesystemEggStorage(config)
    version, eggfile = eggstorage.get(project)
    if eggfile:
        prefix = '%s-%s-' % (project, version)
        fd, eggpath = tempfile.mkstemp(prefix=prefix, suffix='.egg')
        lf = os.fdopen(fd, 'wb')
        shutil.copyfileobj(eggfile, lf)
        lf.close()
        activate_egg(eggpath)
    else:
        eggpath = None
    try:
        assert 'scrapy.conf' not in sys.modules, "Scrapy settings already loaded"
        yield
    finally:
        if eggpath:
            os.remove(eggpath)
Beispiel #6
0
def project_environment(project):
    config = Config()
    eggstorage = FilesystemEggStorage(config)
    version, eggfile = eggstorage.get(project)
    if eggfile:
        prefix = '%s-%s-' % (project, version)
        fd, eggpath = tempfile.mkstemp(prefix=prefix, suffix='.egg')
        lf = os.fdopen(fd, 'wb')
        shutil.copyfileobj(eggfile, lf)
        lf.close()
        activate_egg(eggpath)
    else:
        eggpath = None
    try:
        assert 'scrapy.conf' not in sys.modules, "Scrapy settings already loaded"
        yield
    finally:
        if eggpath:
            os.remove(eggpath)
Beispiel #7
0
 def find_project_requirements(self, project, egg_storage=None, eggf=None):
     if eggf is None:
         if egg_storage is None:
             egg_storage = FilesystemEggStorage(scrapyd.config.Config())
         version, eggf = egg_storage.get(project)
     try:
         prefix = '%s-nover-' % (project)
         fd, eggpath = tempfile.mkstemp(prefix=prefix, suffix='.egg')
         logger.debug('tmp egg file saved to %s' % eggpath)
         lf = os.fdopen(fd, 'wb')
         eggf.seek(0)
         shutil.copyfileobj(eggf, lf)
         lf.close()
         try:
             d = pkg_resources.find_distributions(eggpath).next()
         except StopIteration:
             raise ValueError("Unknown or corrupt egg")
         requirements = [str(x) for x in d.requires()]
         return requirements
     finally:
         if eggpath:
             os.remove(eggpath)
Beispiel #8
0
    def test_egg(self, eggf):
        future = Future()
        temp_dir = tempfile.mkdtemp('scrapydd-egg-%s' % self.project_name)
        self.temp_dir = temp_dir
        eggf.seek(0)
        egg_storage = FilesystemEggStorage(
            scrapyd.config.Config({'eggs_dir': os.path.join(temp_dir,
                                                            'eggs')}))
        egg_storage.put(eggf, project=self.project_name, version='1')
        eggf.seek(0)

        requirements = self._read_egg_requirements(eggf) + ['scrapyd']

        def after_spider_list(callback_future):
            logger.debug('after_spider_list')
            exc = callback_future.exception()
            if exc is not None:
                future.set_exception(exc)
                return
            spider_list = callback_future.result()
            #os.removedirs(temp_dir)
            future.set_result(spider_list)

        def after_pip_install(callback_future):
            logger.debug('after_pip_install')
            exc = callback_future.exception()
            if exc is not None:
                future.set_exception(exc)
                return

            self.spider_list(self.project_name,
                             cwd=temp_dir).add_done_callback(after_spider_list)

        self.pip_install(requirements).add_done_callback(after_pip_install)

        return future
Beispiel #9
0
def application(config):
    app = Application("Scrapyd")
    http_port = config.getint('http_port', 6800)
    bind_address = config.get('bind_address', '0.0.0.0')
    poll_interval = config.getfloat('poll_interval', 5)

    poller = QueuePoller(config)
    eggstorage = FilesystemEggStorage(config)

    schedpath = config.get('scheduler', 'scrapyd.scheduler.SpiderScheduler')
    schedCls = load_object(schedpath)
    scheduler = schedCls(config, app)

    environment = Environment(config)

    pubsub_path = config.get('pubsub', 'scrapyd.pubsub.BasePubSub')
    pubsubCls = load_object(pubsub_path)
    pubsub = pubsubCls(config, app)

    app.setComponent(IPoller, poller)
    app.setComponent(IEggStorage, eggstorage)
    app.setComponent(ISpiderScheduler, scheduler)
    app.setComponent(IEnvironment, environment)
    app.setComponent(IPubSub, pubsub)

    laupath = config.get('launcher', 'scrapyd.launcher.Launcher')
    laucls = load_object(laupath)
    launcher = laucls(config, app)

    timer = TimerService(poll_interval, poller.poll)
    webservice = TCPServer(http_port,
                           server.Site(Root(config, app)),
                           interface=bind_address)
    log.msg(
        format=
        "Scrapyd web console available at http://%(bind_address)s:%(http_port)s/",
        bind_address=bind_address,
        http_port=http_port)

    pubsub.setServiceParent(app)
    launcher.setServiceParent(app)
    timer.setServiceParent(app)
    webservice.setServiceParent(app)
    return app
Beispiel #10
0
def application(config):
    app = Application("Scrapyd")
    http_port = config.getint('http_port', 6800)

    if 'PORT' in os.environ:
        http_port = int(os.environ.get('PORT'))

    bind_address = '0.0.0.0' if 'PORT' in os.environ else config.get('bind_address', '127.0.0.1')

    poll_interval = config.getfloat('poll_interval', 5)

    poller = QueuePoller(config)
    eggstorage = FilesystemEggStorage(config)
    scheduler = SpiderScheduler(config)
    environment = Environment(config)

    app.setComponent(IPoller, poller)
    app.setComponent(IEggStorage, eggstorage)
    app.setComponent(ISpiderScheduler, scheduler)
    app.setComponent(IEnvironment, environment)

    laupath = config.get('launcher', 'scrapyd.launcher.Launcher')
    laucls = load_object(laupath)
    launcher = laucls(config, app)

    webpath = config.get('webroot', 'scrapyd.website.Root')
    webcls = load_object(webpath)

    timer = TimerService(poll_interval, poller.poll)
    webservice = TCPServer(http_port, server.Site(webcls(config, app)), interface=bind_address)
    log.msg(format="Scrapyd web console available at http://%(bind_address)s:%(http_port)s/",
            bind_address=bind_address, http_port=http_port)

    launcher.setServiceParent(app)
    timer.setServiceParent(app)
    webservice.setServiceParent(app)

    return app
Beispiel #11
0
class EggStorageTest(unittest.TestCase):

    def setUp(self):
        d = self.mktemp()
        config = Config(values={'eggs_dir': d})
        self.eggst = FilesystemEggStorage(config)

    def test_interface(self):
        verifyObject(IEggStorage, self.eggst)

    def test_put_get_list_delete(self):
        self.eggst.put(BytesIO(b"egg01"), 'mybot', '01')
        self.eggst.put(BytesIO(b"egg03"), 'mybot', '03/ver')
        self.eggst.put(BytesIO(b"egg02"), 'mybot', '02_my branch')

        self.assertEqual(self.eggst.list('mybot'), [
            '01',
            '02_my_branch',
            '03_ver'
        ])
        self.assertEqual(self.eggst.list('mybot2'), [])

        v, f = self.eggst.get('mybot')
        self.assertEqual(v, "03_ver")
        self.assertEqual(f.read(), b"egg03")
        f.close()

        v, f = self.eggst.get('mybot', '02_my branch')
        self.assertEqual(v, "02_my branch")
        self.assertEqual(f.read(), b"egg02")
        f.close()

        v, f = self.eggst.get('mybot', '02_my_branch')
        self.assertEqual(v, "02_my_branch")
        self.assertEqual(f.read(), b"egg02")
        f.close()

        self.eggst.delete('mybot', '02_my branch')
        self.assertEqual(self.eggst.list('mybot'), ['01', '03_ver'])

        self.eggst.delete('mybot', '03_ver')
        self.assertEqual(self.eggst.list('mybot'), ['01'])

        self.eggst.delete('mybot')
        self.assertEqual(self.eggst.list('mybot'), [])
Beispiel #12
0
class EggStorageTest(unittest.TestCase):

    def setUp(self):
        d = self.mktemp()
        config = Config(values={'eggs_dir': d})
        self.eggst = FilesystemEggStorage(config)

    def test_interface(self):
        verifyObject(IEggStorage, self.eggst)

    def test_put_get_list_delete(self):
        self.eggst.put(StringIO("egg01"), 'mybot', '01')
        self.eggst.put(StringIO("egg03"), 'mybot', '03/ver')
        self.eggst.put(StringIO("egg02"), 'mybot', '02_my branch')

        self.assertEqual(self.eggst.list('mybot'), [
            '01', 
            '02_my_branch', 
            '03_ver'
        ])
        self.assertEqual(self.eggst.list('mybot2'), [])

        v, f = self.eggst.get('mybot')
        self.assertEqual(v, "03_ver")
        self.assertEqual(f.read(), "egg03")
        f.close()

        v, f = self.eggst.get('mybot', '02_my branch')
        self.assertEqual(v, "02_my branch")
        self.assertEqual(f.read(), "egg02")
        f.close()
        
        v, f = self.eggst.get('mybot', '02_my_branch')
        self.assertEqual(v, "02_my_branch")
        self.assertEqual(f.read(), "egg02")
        f.close()

        self.eggst.delete('mybot', '02_my branch')
        self.assertEqual(self.eggst.list('mybot'), ['01', '03_ver'])
        
        self.eggst.delete('mybot', '03_ver')
        self.assertEqual(self.eggst.list('mybot'), ['01'])

        self.eggst.delete('mybot')
        self.assertEqual(self.eggst.list('mybot'), [])
Beispiel #13
0
 def setUp(self):
     d = self.mktemp()
     config = Config(values={'eggs_dir': d})
     self.eggst = FilesystemEggStorage(config)
Beispiel #14
0
import os
from scrapyd.eggstorage import FilesystemEggStorage
from scrapyd.config import Config
import urllib2
from poster.encode import multipart_encode
from poster.streaminghttp import register_openers
register_openers()

source_dir = '/kf/scrapyd'

dest_url = 'http://localhost:6801/addversion.json'

source_eggs_dir = os.path.join(source_dir, 'eggs')
source_config = Config({'eggs_dir': source_eggs_dir})
source_egg_storage = FilesystemEggStorage(source_config)
for dir in os.listdir(source_eggs_dir):
    #print dir
    project = dir
    version, egg = source_egg_storage.get(project)
    print project, version
    post_data = {
        'egg': egg,
        'project': project,
        'version': version,
    }
    datagen, headers = multipart_encode(post_data)
    request = urllib2.Request(url=dest_url, headers=headers, data=datagen)
    try:
        res = urllib2.urlopen(request)
    except urllib2.HTTPError as e:
        print 'HTTPError: %s' % e
Beispiel #15
0
class TaskExecutor():
    def __init__(self, task, config=None):
        '''
        @type task: SpiderTask
        '''
        self.task = task
        if config is None:
            config = AgentConfig()
        if config.get('server_https_port'):
            self.service_base = 'https://%s:%d' % (config.get('server'), config.getint('server_https_port'))
        else:
            self.service_base = 'http://%s:%d' % (config.get('server'), config.getint('server_port'))
        self._f_output = None
        self.output_file = None
        self.p = None
        self.check_process_callback = None
        self.items_file = None
        self.ret_code = None
        self.workspace_dir = tempfile.mkdtemp(prefix='ddjob-%s-%s-' % (task.project_name, task.id))
        if not os.path.exists(self.workspace_dir):
            os.makedirs(self.workspace_dir)
        self.output_file = str(os.path.join(self.workspace_dir, '%s.log' % self.task.id))
        self._f_output = open(self.output_file, 'w')

        eggs_dir = os.path.join(self.workspace_dir, 'eggs')
        if not os.path.exists(eggs_dir):
            os.mkdir(eggs_dir)
        self.egg_storage = FilesystemEggStorage(scrapyd.config.Config(values={'eggs_dir': eggs_dir}))
        self.on_subprocess_start = None

    @gen.coroutine
    def execute(self):
        try:
            workspace = ProjectWorkspace(self.task.project_name)
            yield workspace.init()
            logger.debug('begin download egg.')
            egg_request_url = urlparse.urljoin(self.service_base, '/spiders/%d/egg' % self.task.spider_id)
            request = HTTPRequest(egg_request_url)
            client = AsyncHTTPClient()
            response = yield client.fetch(request)
            self.egg_storage.put(response.buffer, self.task.project_name, self.task.project_version)
            logger.debug('download egg done.')
            requirements = workspace.find_project_requirements(self.task.project_name, egg_storage=self.egg_storage)
            yield workspace.pip_install(requirements)
            result = yield self.execute_subprocess()
        except ProcessFailed as e:
            logger.error(e)
            error_log = e.message
            if e.std_output:
                logger.error(e.std_output)
                error_log += e.std_output
            result = self.complete_with_error(error_log)
        except Exception as e:
            logger.error(e)
            error_log = e.message
            result = self.complete_with_error(error_log)
        raise gen.Return(result)

    def check_process(self):
        execute_result = self.p.poll()
        logger.debug('check process')
        if execute_result is not None:
            logger.info('task complete')
            self.complete(execute_result)

    def execute_subprocess(self):
        future = Future()
        # init items file
        workspace = ProjectWorkspace(self.task.project_name)
        self.items_file = os.path.join(self.workspace_dir, '%s.%s' % (self.task.id, 'jl'))
        python = workspace.python
        runner = 'scrapyd.runner'
        pargs = [python, '-m', runner, 'crawl', self.task.spider_name]
        for spider_parameter_key, spider_parameter_value in self.task.spider_parameters.items():
            pargs += [
                        '-s',
                        '%s=%s' % (spider_parameter_key, spider_parameter_value)
                      ]

        env = os.environ.copy()
        env['SCRAPY_PROJECT'] = str(self.task.project_name)
        env['SCRAPY_JOB'] = str(self.task.id)
        env['SCRAPY_FEED_URI'] = str(path_to_file_uri(self.items_file))
        try:
            self.p = subprocess.Popen(pargs, env=env, stdout=self._f_output, cwd=self.workspace_dir, stderr=self._f_output)
            if self.on_subprocess_start:
                self.on_subprocess_start(self.task, self.p.pid)

        except Exception as e:
            return self.complete_with_error('Error when starting crawl subprocess : %s' % e)
        logger.info('job %s started on pid: %d' % (self.task.id, self.p.pid))

        def check_process():
            execute_result = self.p.poll()
            logger.debug('check process')
            if execute_result is not None:
                logger.info('task complete')
                future.set_result(self.complete(execute_result))

        self.check_process_callback = PeriodicCallback(check_process, 1*1000)
        self.check_process_callback.start()
        return future

    def result(self):
        return self

    def complete(self, ret_code):
        self._f_output.close()
        self.ret_code = ret_code
        self.check_process_callback.stop()
        self.check_process_callback = None
        return self.result()

    def complete_with_error(self, error_message):
        logger.error(error_message)
        self._f_output.write(error_message)
        self._f_output.close()
        self.ret_code = 1
        return self.result()

    def __del__(self):
        logger.debug('delete task executor for task %s' % self.task.id)
        if self.workspace_dir and os.path.exists(self.workspace_dir):
            shutil.rmtree(self.workspace_dir)

    @gen.coroutine
    def kill(self):
        logger.info('killing job %s' % self.task.id)
        if self.p:
            self.p.terminate()

        gen.sleep(10)
        if self.p:
            self.p.kill()
Beispiel #16
0
class ProjectWorkspace(object):
    pip = None
    python = None
    process = None
    project_workspace_dir = None
    project_check = None
    temp_dir = None

    def __init__(self, project_name):
        project_workspace_dir = os.path.abspath(
            os.path.join('workspace', project_name))
        self.project_workspace_dir = project_workspace_dir
        self.project_name = project_name
        self.egg_storage = FilesystemEggStorage(scrapyd.config.Config())
        if sys.platform.startswith('linux'):
            self.pip = os.path.join(project_workspace_dir, 'bin', 'pip')
            self.python = os.path.join(project_workspace_dir, 'bin', 'python')
        elif sys.platform.startswith('win'):
            self.pip = os.path.join(project_workspace_dir, 'Scripts',
                                    'pip.exe')
            self.python = os.path.join(project_workspace_dir, 'Scripts',
                                       'python.exe')
        else:
            raise NotImplementedError('Unsupported system %s' % sys.platform)

    def init(self):
        '''
        init project isolated workspace,
        :return: future
        '''
        future = Future()
        if os.path.exists(self.pip) and os.path.exists(self.python):
            future.set_result(self)
            return future

        logger.debug('start creating virtualenv.')
        try:
            process = Popen([
                'virtualenv', '--system-site-packages',
                self.project_workspace_dir
            ],
                            stdout=PIPE,
                            stderr=PIPE)
        except Exception as e:
            future.set_exception(e)
            return future

        def check_process():
            logger.debug('create virtualenv process poll.')
            retcode = process.poll()
            if retcode is not None:
                if retcode == 0:
                    future.set_result(self)
                else:
                    std_output = process.stdout.read()
                    err_output = process.stderr.read()
                    future.set_exception(
                        ProcessFailed('Error when init workspace virtualenv ',
                                      std_output=std_output,
                                      err_output=err_output))
                return
            IOLoop.current().call_later(1, check_process)

        check_process()
        return future

    def find_project_requirements(self, project, egg_storage=None, eggf=None):
        if eggf is None:
            if egg_storage is None:
                egg_storage = FilesystemEggStorage(scrapyd.config.Config())
            version, eggf = egg_storage.get(project)
        try:
            prefix = '%s-nover-' % (project)
            fd, eggpath = tempfile.mkstemp(prefix=prefix, suffix='.egg')
            logger.debug('tmp egg file saved to %s' % eggpath)
            lf = os.fdopen(fd, 'wb')
            eggf.seek(0)
            shutil.copyfileobj(eggf, lf)
            lf.close()
            try:
                d = pkg_resources.find_distributions(eggpath).next()
            except StopIteration:
                raise ValueError("Unknown or corrupt egg")
            requirements = [str(x) for x in d.requires()]
            return requirements
        finally:
            if eggpath:
                os.remove(eggpath)

    def test_egg(self, eggf):
        future = Future()
        temp_dir = tempfile.mkdtemp('scrapydd-egg-%s' % self.project_name)
        self.temp_dir = temp_dir
        eggf.seek(0)
        egg_storage = FilesystemEggStorage(
            scrapyd.config.Config({'eggs_dir': os.path.join(temp_dir,
                                                            'eggs')}))
        egg_storage.put(eggf, project=self.project_name, version='1')
        eggf.seek(0)

        requirements = self._read_egg_requirements(eggf) + ['scrapyd']

        def after_spider_list(callback_future):
            logger.debug('after_spider_list')
            exc = callback_future.exception()
            if exc is not None:
                future.set_exception(exc)
                return
            spider_list = callback_future.result()
            #os.removedirs(temp_dir)
            future.set_result(spider_list)

        def after_pip_install(callback_future):
            logger.debug('after_pip_install')
            exc = callback_future.exception()
            if exc is not None:
                future.set_exception(exc)
                return

            self.spider_list(self.project_name,
                             cwd=temp_dir).add_done_callback(after_spider_list)

        self.pip_install(requirements).add_done_callback(after_pip_install)

        return future

    def _read_egg_requirements(self, eggf):
        try:
            prefix = '%s-%s-' % (self.project_name, 0)
            fd, eggpath = tempfile.mkstemp(prefix=prefix, suffix='.egg')
            logger.debug('tmp egg file saved to %s' % eggpath)
            lf = os.fdopen(fd, 'wb')
            eggf.seek(0)
            shutil.copyfileobj(eggf, lf)
            lf.close()
            try:
                d = pkg_resources.find_distributions(eggpath).next()
            except StopIteration:
                raise ValueError("Unknown or corrupt egg")
            requirements = [str(x) for x in d.requires()]
            return requirements
        finally:
            if eggpath:
                os.remove(eggpath)

    def pip_install(self, requirements):
        logger.debug('installing requirements: %s' % requirements)
        future = Future()
        try:
            process = Popen([self.pip, 'install'] + requirements,
                            stdout=PIPE,
                            stderr=PIPE)
        except Exception as e:
            future.set_exception(e)
            return future

        def check_process():
            logger.debug('poll')
            retcode = process.poll()
            if retcode is not None:
                if retcode == 0:
                    future.set_result(self)
                else:
                    std_out = process.stdout.read()
                    err_out = process.stderr.read()
                    future.set_exception(
                        ProcessFailed(std_output=std_out, err_output=err_out))
                return
            IOLoop.current().call_later(1, check_process)

        check_process()
        return future

    def spider_list(self, project, cwd=None):
        future = Future()
        try:
            env = os.environ.copy()
            env['SCRAPY_PROJECT'] = project
            process = Popen([self.python, '-m', 'scrapyd.runner', 'list'],
                            env=env,
                            cwd=cwd,
                            stdout=PIPE,
                            stderr=PIPE)
        except Exception as e:
            logger.error(e)
            future.set_exception(e)
            return future

        def check_process():
            logger.debug('poll')
            retcode = process.poll()
            if retcode is not None:
                if retcode == 0:
                    future.set_result(process.stdout.read().splitlines())
                else:
                    #future.set_exception(ProcessFailed(std_output=process.stdout.read(), err_output=process.stderr.read()))
                    future.set_exception(
                        InvalidProjectEgg(detail=process.stderr.read()))
                return
            IOLoop.current().call_later(1, check_process)

        check_process()
        return future

    def clearup(self):
        '''
        clean up temp files.
        :return:
        '''
        if self.temp_dir and os.path.exists(self.temp_dir):
            shutil.rmtree(self.temp_dir)

    def put_egg(self, eggfile, version):
        eggfile.seek(0)
        self.egg_storage.put(eggfile=eggfile,
                             project=self.project_name,
                             version=version)

    def get_egg(self, version=None):
        return self.egg_storage.get(self.project_name, version=version)

    def delete_egg(self, project, version=None):
        logger.info('deleting project eggs')
        return self.egg_storage.delete(project, version)

    def list_versions(self, project):
        return self.egg_storage.list(project)
class EggStorageTest(unittest.TestCase):
    def setUp(self):
        d = self.mktemp()
        config = Config(values={"eggs_dir": d})
        self.eggst = FilesystemEggStorage(config)

    def test_interface(self):
        verifyObject(IEggStorage, self.eggst)

    def test_put_get_list_delete(self):
        self.eggst.put(StringIO("egg01"), "mybot", "01")
        self.eggst.put(StringIO("egg03"), "mybot", "03")
        self.eggst.put(StringIO("egg02"), "mybot", "02")

        self.assertEqual(self.eggst.list("mybot"), ["01", "02", "03"])
        self.assertEqual(self.eggst.list("mybot2"), [])

        v, f = self.eggst.get("mybot")
        self.assertEqual(v, "03")
        self.assertEqual(f.read(), "egg03")
        f.close()

        v, f = self.eggst.get("mybot", "02")
        self.assertEqual(v, "02")
        self.assertEqual(f.read(), "egg02")
        f.close()

        self.eggst.delete("mybot", "02")
        self.assertEqual(self.eggst.list("mybot"), ["01", "03"])

        self.eggst.delete("mybot")
        self.assertEqual(self.eggst.list("mybot"), [])
Beispiel #18
0
 def setUp(self):
     d = self.mktemp()
     config = Config(values={'eggs_dir': d})
     self.eggst = FilesystemEggStorage(config)