Beispiel #1
0
def create_root(config):
    from scrapy import log
    from scrapy.settings import CrawlerSettings
    from slyd.crawlerspec import (CrawlerSpecManager,
                                  create_crawler_spec_resource)
    from slyd.bot import create_bot_resource
    import slyd.settings
    from slyd.projects import ProjectsResource

    root = Resource()
    root.putChild("static", File(config['docroot']))

    crawler_settings = CrawlerSettings(settings_module=slyd.settings)
    spec_manager = CrawlerSpecManager(crawler_settings)

    # add project management at /projects
    projects = ProjectsResource(crawler_settings)
    root.putChild('projects', projects)

    # add crawler at /projects/PROJECT_ID/bot
    log.msg("Slybot specs loading from %s/[PROJECT]" % spec_manager.basedir,
            level=log.DEBUG)
    projects.putChild("bot", create_bot_resource(spec_manager))

    # add spec at /projects/PROJECT_ID/spec
    spec = create_crawler_spec_resource(spec_manager)
    projects.putChild("spec", spec)
    return root
Beispiel #2
0
def create_root(config):
    from scrapy import log
    from scrapy.settings import CrawlerSettings
    from slyd.crawlerspec import (CrawlerSpecManager,
        create_crawler_spec_resource)
    from slyd.bot import create_bot_resource
    import slyd.settings
    from slyd.projects import ProjectsResource

    root = Resource()
    root.putChild("static", File(config['docroot']))

    crawler_settings = CrawlerSettings(settings_module=slyd.settings)
    spec_manager = CrawlerSpecManager(crawler_settings)

    # add project management at /projects
    projects = ProjectsResource(crawler_settings)
    root.putChild('projects', projects)

    # add crawler at /projects/PROJECT_ID/bot
    log.msg("Slybot specs loading from %s/[PROJECT]" % spec_manager.basedir,
        level=log.DEBUG)
    projects.putChild("bot", create_bot_resource(spec_manager))

    # add spec at /projects/PROJECT_ID/spec
    spec = create_crawler_spec_resource(spec_manager)
    projects.putChild("spec", spec)
    return root
Beispiel #3
0
def create_root(config):
    from scrapy import log
    from scrapy.settings import Settings
    from .specmanager import SpecManager
    from .authmanager import AuthManager
    from .projectspec import create_project_resource
    from slyd.bot import create_bot_resource
    from slyd.projects import create_projects_manager_resource

    import slyd.settings

    root = Resource()
    root.putChild("static", File(config['docroot']))

    settings = Settings()
    settings.setmodule(slyd.settings)
    spec_manager = SpecManager(settings)

    # add server capabilities at /server_capabilities
    capabilities = Capabilities(spec_manager)
    root.putChild('server_capabilities', capabilities)

    # add projects manager at /projects
    projects = create_projects_manager_resource(spec_manager)
    root.putChild('projects', projects)

    # add crawler at /projects/PROJECT_ID/bot
    projects.putChild("bot", create_bot_resource(spec_manager))

    # add project spec at /projects/PROJECT_ID/spec
    spec = create_project_resource(spec_manager)
    projects.putChild("spec", spec)

    auth_manager = AuthManager(settings)
    return auth_manager.protectResource(root)
Beispiel #4
0
def create_root(config, settings_module):
    from scrapy.settings import Settings
    from .specmanager import SpecManager
    from .authmanager import AuthManager
    from .projectspec import create_project_resource
    from slyd.bot import create_bot_resource
    from slyd.projects import create_projects_manager_resource

    from slyd.splash.ferry import (FerryServerProtocol, FerryServerFactory,
                                   create_ferry_resource)
    from slyd.splash.proxy import ProxyResource

    root = Resource()
    static = Resource()
    for file_name in listdir(config['docroot']):
        file_path = join(config['docroot'], file_name)
        if isfile(file_path):
            static.putChild(file_name, File(file_path))
    static.putChild('main.html', File(join(config['docroot'], 'index.html')))

    root.putChild('static', static)
    root.putChild('assets', File(join(config['docroot'], 'assets')))
    root.putChild('fonts', File(join(config['docroot'], 'assets', 'fonts')))
    root.putChild('', File(join(config['docroot'], 'index.html')))

    settings = Settings()
    settings.setmodule(settings_module)
    spec_manager = SpecManager(settings)

    # add server capabilities at /server_capabilities
    capabilities = Capabilities(spec_manager)
    root.putChild('server_capabilities', capabilities)

    # add projects manager at /projects
    projects = create_projects_manager_resource(spec_manager)
    root.putChild('projects', projects)

    # add crawler at /projects/PROJECT_ID/bot
    projects.putChild('bot', create_bot_resource(spec_manager))

    # add project spec at /projects/PROJECT_ID/spec
    spec = create_project_resource(spec_manager)
    projects.putChild('spec', spec)

    # add websockets for communicating with splash
    factory = FerryServerFactory("ws://127.0.0.1:%s" % config['port'],
                                 debug=False,
                                 assets=config['docroot'])
    factory.protocol = FerryServerProtocol
    factory.setProtocolOptions(allowHixie76=True)
    websocket = create_ferry_resource(spec_manager, factory)
    root.putChild("ws", websocket)

    root.putChild('proxy', ProxyResource())

    auth_manager = AuthManager(settings)
    return auth_manager.protectResource(root)
Beispiel #5
0
def create_root(config, settings_module):
    from scrapy.settings import Settings
    from .specmanager import SpecManager
    from .authmanager import AuthManager
    from .projectspec import create_project_resource
    from slyd.bot import create_bot_resource
    from slyd.projects import create_projects_manager_resource

    from slyd.splash.ferry import (FerryServerProtocol, FerryServerFactory,
                                   create_ferry_resource)
    from slyd.splash.proxy import ProxyResource

    root = Resource()
    static = Resource()
    for file_name in listdir(config['docroot']):
        file_path = join(config['docroot'], file_name)
        if isfile(file_path):
            static.putChild(file_name, File(file_path))
    static.putChild('main.html', File(join(config['docroot'], 'index.html')))

    root.putChild('static', static)
    root.putChild('assets', File(join(config['docroot'], 'assets')))
    root.putChild('fonts', File(join(config['docroot'], 'assets', 'fonts')))
    root.putChild('', File(join(config['docroot'], 'index.html')))

    settings = Settings()
    settings.setmodule(settings_module)
    spec_manager = SpecManager(settings)

    # add server capabilities at /server_capabilities
    capabilities = Capabilities(spec_manager)
    root.putChild('server_capabilities', capabilities)

    # add projects manager at /projects
    projects = create_projects_manager_resource(spec_manager)
    root.putChild('projects', projects)

    # add crawler at /projects/PROJECT_ID/bot
    projects.putChild('bot', create_bot_resource(spec_manager))

    # add project spec at /projects/PROJECT_ID/spec
    spec = create_project_resource(spec_manager)
    projects.putChild('spec', spec)

    # add websockets for communicating with splash
    factory = FerryServerFactory("ws://127.0.0.1:%s" % config['port'],
                                 debug=False,
                                 assets=config['docroot'])
    factory.protocol = FerryServerProtocol
    factory.setProtocolOptions(allowHixie76=True)
    websocket = create_ferry_resource(spec_manager, factory)
    root.putChild("ws", websocket)

    root.putChild('proxy', ProxyResource())

    auth_manager = AuthManager(settings)
    return auth_manager.protectResource(root)
Beispiel #6
0
    def setUp(self):
        # configure bot resource
        sm = test_spec_manager()
        self.bot_resource = create_bot_resource(sm)
        self.botsite = TestSite(self.bot_resource)

        # configure fake website to crawl
        docroot = join(RESOURCE_DIR, 'docroot')
        factory = Site(File(docroot))
        self.listen_port = reactor.listenTCP(8997, factory)
Beispiel #7
0
    def setUp(self):
        # configure bot resource
        sm = create_spec_manager()
        self.bot_resource = create_bot_resource(sm)
        self.botsite = TestSite(self.bot_resource)

        # configure fake website to crawl
        docroot = join(RESOURCE_DIR, 'docroot')
        factory = Site(File(docroot))
        self.listen_port = reactor.listenTCP(8997, factory)
Beispiel #8
0
def create_root(config):
    from scrapy import log
    from scrapy.settings import Settings
    from .specmanager import SpecManager
    from .authmanager import AuthManager
    from .projectspec import create_project_resource
    from slyd.bot import create_bot_resource
    from slyd.projects import create_projects_manager_resource

    import slyd.settings

    root = Resource()
    static = Resource()
    for file_name in listdir(config['docroot']):
        file_path = join(config['docroot'], file_name)
        if isfile(file_path):
            static.putChild(file_name, File(file_path))
    static.putChild('main.html', File(join(config['docroot'], 'index.html')))

    root.putChild('static', static)
    root.putChild('assets', File(join(config['docroot'], 'assets')))
    root.putChild('fonts', File(join(config['docroot'], 'assets', 'fonts')))
    root.putChild('', File(join(config['docroot'], 'index.html')))

    settings = Settings()
    settings.setmodule(slyd.settings)
    spec_manager = SpecManager(settings)

    # add server capabilities at /server_capabilities
    capabilities = Capabilities(spec_manager)
    root.putChild('server_capabilities', capabilities)

    # add projects manager at /projects
    projects = create_projects_manager_resource(spec_manager)
    root.putChild('projects', projects)

    # add crawler at /projects/PROJECT_ID/bot
    projects.putChild('bot', create_bot_resource(spec_manager))

    # add project spec at /projects/PROJECT_ID/spec
    spec = create_project_resource(spec_manager)
    projects.putChild('spec', spec)

    auth_manager = AuthManager(settings)
    return auth_manager.protectResource(root)
Beispiel #9
0
def create_root(config):
    from scrapy import log
    from scrapy.settings import Settings
    from .specmanager import SpecManager
    from .authmanager import AuthManager
    from .projectspec import create_project_resource
    from slyd.bot import create_bot_resource
    from slyd.projects import create_projects_manager_resource

    import slyd.settings

    root = Resource()
    static = Resource()
    for file_name in listdir(config['docroot']):
        file_path = join(config['docroot'], file_name)
        if isfile(file_path):
            static.putChild(file_name, File(file_path))
    static.putChild('main.html', File(join(config['docroot'], 'index.html')))

    root.putChild('static', static)
    root.putChild('assets', File(join(config['docroot'], 'assets')))
    root.putChild('fonts', File(join(config['docroot'], 'assets', 'fonts')))
    root.putChild('', File(join(config['docroot'], 'index.html')))

    settings = Settings()
    settings.setmodule(slyd.settings)
    spec_manager = SpecManager(settings)

    # add server capabilities at /server_capabilities
    capabilities = Capabilities(spec_manager)
    root.putChild('server_capabilities', capabilities)

    # add projects manager at /projects
    projects = create_projects_manager_resource(spec_manager)
    root.putChild('projects', projects)

    # add crawler at /projects/PROJECT_ID/bot
    projects.putChild('bot', create_bot_resource(spec_manager))

    # add project spec at /projects/PROJECT_ID/spec
    spec = create_project_resource(spec_manager)
    projects.putChild('spec', spec)

    auth_manager = AuthManager(settings)
    return auth_manager.protectResource(root)