Ejemplo n.º 1
0
def get_application(config):
    app = Application('Scrapyd')
    http_port = config.getint('http_port', 6800)
    bind_address = config.get('bind_address', '0.0.0.0')
    poll_interval = config.getfloat('poll_interval', 5)

    poller = QueuePoller(config)
    eggstorage = FilesystemEggStorage(config)
    scheduler = SpiderScheduler(config)
    environment = Environment(config)

    app.setComponent(IPoller, poller)
    app.setComponent(IEggStorage, eggstorage)
    app.setComponent(ISpiderScheduler, scheduler)
    app.setComponent(IEnvironment, environment)

    laupath = config.get('launcher', 'scrapyd_mongodb.launcher.Launcher')
    laucls = load_object(laupath)
    launcher = laucls(config, app)

    timer = TimerService(poll_interval, poller.poll)
    webservice = TCPServer(http_port,
                           server.Site(Root(config, app)),
                           interface=bind_address)
    log.msg(format=('Scrapyd web console available at '
                    'http://%(bind_address)s:%(http_port)s/', ),
            bind_address=bind_address,
            http_port=http_port)

    launcher.setServiceParent(app)
    timer.setServiceParent(app)
    webservice.setServiceParent(app)

    return app
Ejemplo n.º 2
0
def application(config):
    app = Application("Scrapyd")
    http_port = int(environ.get('PORT', config.getint('http_port', 6800)))
    config.cp.set('scrapyd', 'database_url', environ.get('DATABASE_URL'))

    poller = Psycopg2QueuePoller(config)
    eggstorage = FilesystemEggStorage(config)
    scheduler = Psycopg2SpiderScheduler(config)
    environment = Environment(config)

    app.setComponent(IPoller, poller)
    app.setComponent(IEggStorage, eggstorage)
    app.setComponent(ISpiderScheduler, scheduler)
    app.setComponent(IEnvironment, environment)

    launcher = Launcher(config, app)
    timer = TimerService(5, poller.poll)
    webservice = TCPServer(http_port, server.Site(Root(config, app)))
    log.msg("Scrapyd web console available at http://localhost:%s/ (HEROKU)" %
            http_port)

    launcher.setServiceParent(app)
    timer.setServiceParent(app)
    webservice.setServiceParent(app)

    return app
Ejemplo n.º 3
0
    def __init__(self, task, config=None):
        '''
        @type task: SpiderTask
        '''
        self.task = task
        if config is None:
            config = AgentConfig()
        if config.get('server_https_port'):
            self.service_base = 'https://%s:%d' % (config.get('server'), config.getint('server_https_port'))
        else:
            self.service_base = 'http://%s:%d' % (config.get('server'), config.getint('server_port'))
        self._f_output = None
        self.output_file = None
        self.p = None
        self.check_process_callback = None
        self.items_file = None
        self.ret_code = None
        self.workspace_dir = tempfile.mkdtemp(prefix='ddjob-%s-%s-' % (task.project_name, task.id))
        if not os.path.exists(self.workspace_dir):
            os.makedirs(self.workspace_dir)
        self.output_file = str(os.path.join(self.workspace_dir, '%s.log' % self.task.id))
        self._f_output = open(self.output_file, 'w')

        eggs_dir = os.path.join(self.workspace_dir, 'eggs')
        if not os.path.exists(eggs_dir):
            os.mkdir(eggs_dir)
        self.egg_storage = FilesystemEggStorage(scrapyd.config.Config(values={'eggs_dir': eggs_dir}))
        self.on_subprocess_start = None
Ejemplo n.º 4
0
 def __init__(self, project_name):
     project_workspace_dir = os.path.abspath(
         os.path.join('workspace', project_name))
     self.project_workspace_dir = project_workspace_dir
     self.project_name = project_name
     self.egg_storage = FilesystemEggStorage(scrapyd.config.Config())
     if sys.platform.startswith('linux'):
         self.pip = os.path.join(project_workspace_dir, 'bin', 'pip')
         self.python = os.path.join(project_workspace_dir, 'bin', 'python')
     elif sys.platform.startswith('win'):
         self.pip = os.path.join(project_workspace_dir, 'Scripts',
                                 'pip.exe')
         self.python = os.path.join(project_workspace_dir, 'Scripts',
                                    'python.exe')
     else:
         raise NotImplementedError('Unsupported system %s' % sys.platform)
Ejemplo n.º 5
0
def application(config):
    app = Application("Scrapyd")
    http_port = config.getint('http_port', 6800)
    bind_address = config.get('bind_address', '0.0.0.0')
    poll_interval = config.getfloat('poll_interval', 5)

    poller = QueuePoller(config)
    eggstorage = FilesystemEggStorage(config)

    schedpath = config.get('scheduler', 'scrapyd.scheduler.SpiderScheduler')
    schedCls = load_object(schedpath)
    scheduler = schedCls(config, app)

    environment = Environment(config)

    pubsub_path = config.get('pubsub', 'scrapyd.pubsub.BasePubSub')
    pubsubCls = load_object(pubsub_path)
    pubsub = pubsubCls(config, app)

    app.setComponent(IPoller, poller)
    app.setComponent(IEggStorage, eggstorage)
    app.setComponent(ISpiderScheduler, scheduler)
    app.setComponent(IEnvironment, environment)
    app.setComponent(IPubSub, pubsub)

    laupath = config.get('launcher', 'scrapyd.launcher.Launcher')
    laucls = load_object(laupath)
    launcher = laucls(config, app)

    timer = TimerService(poll_interval, poller.poll)
    webservice = TCPServer(http_port,
                           server.Site(Root(config, app)),
                           interface=bind_address)
    log.msg(
        format=
        "Scrapyd web console available at http://%(bind_address)s:%(http_port)s/",
        bind_address=bind_address,
        http_port=http_port)

    pubsub.setServiceParent(app)
    launcher.setServiceParent(app)
    timer.setServiceParent(app)
    webservice.setServiceParent(app)
    return app
Ejemplo n.º 6
0
def project_environment(project):
    config = Config()
    eggstorage = FilesystemEggStorage(config)
    version, eggfile = eggstorage.get(project)
    if eggfile:
        prefix = '%s-%s-' % (project, version)
        fd, eggpath = tempfile.mkstemp(prefix=prefix, suffix='.egg')
        lf = os.fdopen(fd, 'wb')
        shutil.copyfileobj(eggfile, lf)
        lf.close()
        activate_egg(eggpath)
    else:
        eggpath = None
    try:
        assert 'scrapy.conf' not in sys.modules, "Scrapy settings already loaded"
        yield
    finally:
        if eggpath:
            os.remove(eggpath)
Ejemplo n.º 7
0
 def find_project_requirements(self, project, egg_storage=None, eggf=None):
     if eggf is None:
         if egg_storage is None:
             egg_storage = FilesystemEggStorage(scrapyd.config.Config())
         version, eggf = egg_storage.get(project)
     try:
         prefix = '%s-nover-' % (project)
         fd, eggpath = tempfile.mkstemp(prefix=prefix, suffix='.egg')
         logger.debug('tmp egg file saved to %s' % eggpath)
         lf = os.fdopen(fd, 'wb')
         eggf.seek(0)
         shutil.copyfileobj(eggf, lf)
         lf.close()
         try:
             d = pkg_resources.find_distributions(eggpath).next()
         except StopIteration:
             raise ValueError("Unknown or corrupt egg")
         requirements = [str(x) for x in d.requires()]
         return requirements
     finally:
         if eggpath:
             os.remove(eggpath)
Ejemplo n.º 8
0
def application(config):
    app = Application("Scrapyd")
    http_port = config.getint('http_port', 6800)

    if 'PORT' in os.environ:
        http_port = int(os.environ.get('PORT'))

    bind_address = '0.0.0.0' if 'PORT' in os.environ else config.get('bind_address', '127.0.0.1')

    poll_interval = config.getfloat('poll_interval', 5)

    poller = QueuePoller(config)
    eggstorage = FilesystemEggStorage(config)
    scheduler = SpiderScheduler(config)
    environment = Environment(config)

    app.setComponent(IPoller, poller)
    app.setComponent(IEggStorage, eggstorage)
    app.setComponent(ISpiderScheduler, scheduler)
    app.setComponent(IEnvironment, environment)

    laupath = config.get('launcher', 'scrapyd.launcher.Launcher')
    laucls = load_object(laupath)
    launcher = laucls(config, app)

    webpath = config.get('webroot', 'scrapyd.website.Root')
    webcls = load_object(webpath)

    timer = TimerService(poll_interval, poller.poll)
    webservice = TCPServer(http_port, server.Site(webcls(config, app)), interface=bind_address)
    log.msg(format="Scrapyd web console available at http://%(bind_address)s:%(http_port)s/",
            bind_address=bind_address, http_port=http_port)

    launcher.setServiceParent(app)
    timer.setServiceParent(app)
    webservice.setServiceParent(app)

    return app
Ejemplo n.º 9
0
    def test_egg(self, eggf):
        future = Future()
        temp_dir = tempfile.mkdtemp('scrapydd-egg-%s' % self.project_name)
        self.temp_dir = temp_dir
        eggf.seek(0)
        egg_storage = FilesystemEggStorage(
            scrapyd.config.Config({'eggs_dir': os.path.join(temp_dir,
                                                            'eggs')}))
        egg_storage.put(eggf, project=self.project_name, version='1')
        eggf.seek(0)

        requirements = self._read_egg_requirements(eggf) + ['scrapyd']

        def after_spider_list(callback_future):
            logger.debug('after_spider_list')
            exc = callback_future.exception()
            if exc is not None:
                future.set_exception(exc)
                return
            spider_list = callback_future.result()
            #os.removedirs(temp_dir)
            future.set_result(spider_list)

        def after_pip_install(callback_future):
            logger.debug('after_pip_install')
            exc = callback_future.exception()
            if exc is not None:
                future.set_exception(exc)
                return

            self.spider_list(self.project_name,
                             cwd=temp_dir).add_done_callback(after_spider_list)

        self.pip_install(requirements).add_done_callback(after_pip_install)

        return future
Ejemplo n.º 10
0
 def setUp(self):
     d = self.mktemp()
     config = Config(values={'eggs_dir': d})
     self.eggst = FilesystemEggStorage(config)
Ejemplo n.º 11
0
import os
from scrapyd.eggstorage import FilesystemEggStorage
from scrapyd.config import Config
import urllib2
from poster.encode import multipart_encode
from poster.streaminghttp import register_openers
register_openers()

source_dir = '/kf/scrapyd'

dest_url = 'http://localhost:6801/addversion.json'

source_eggs_dir = os.path.join(source_dir, 'eggs')
source_config = Config({'eggs_dir': source_eggs_dir})
source_egg_storage = FilesystemEggStorage(source_config)
for dir in os.listdir(source_eggs_dir):
    #print dir
    project = dir
    version, egg = source_egg_storage.get(project)
    print project, version
    post_data = {
        'egg': egg,
        'project': project,
        'version': version,
    }
    datagen, headers = multipart_encode(post_data)
    request = urllib2.Request(url=dest_url, headers=headers, data=datagen)
    try:
        res = urllib2.urlopen(request)
    except urllib2.HTTPError as e:
        print 'HTTPError: %s' % e