Beispiel #1
0
def create_perma_wb_router(config={}):
    """
        Configure server.

        This should do basically the same stuff as pywb.webapp.pywb_init.create_wb_router()
    """
    # start with the default PyWB router
    router = create_wb_router(config)

    # insert a custom route that knows how to play back based on GUID
    wb_handler = create_wb_handler(QueryHandler.init_from_config(PermaCDXSource()),
                                   dict(archive_paths=[get_archive_path()],
                                        wb_handler_class=PermaGUIDHandler,
                                        buffer_response=True,
                                        # head_insert_html=os.path.join(os.path.dirname(__file__), 'head_insert.html'),
                                        enable_memento=True,
                                        redir_to_exact=False))
    wb_handler.replay.content_loader.record_loader.loader = CachedLoader()
    route = PermaRoute(GUID_REGEX, wb_handler)
    router.routes.insert(0, route)

    # use our Django error view
    router.error_view = PermaTemplateView('archive/archive-error.html')
    wb_handler.not_found_view = router.error_view

    return router
Beispiel #2
0
def create_perma_wb_router(config={}):
    """
        Configure server.

        This should do basically the same stuff as pywb.webapp.pywb_init.create_wb_router()
    """
    # start with the default PyWB router
    router = create_wb_router(config)

    # insert a custom route that knows how to play back based on GUID
    wb_handler = create_wb_handler(
        QueryHandler.init_from_config(PermaCDXSource()),
        dict(
            archive_paths=[get_archive_path()],
            wb_handler_class=PermaGUIDHandler,
            buffer_response=True,
            # head_insert_html=os.path.join(os.path.dirname(__file__), 'head_insert.html'),
            enable_memento=True,
            redir_to_exact=False))
    wb_handler.replay.content_loader.record_loader.loader = CachedLoader()
    route = PermaRoute(GUID_REGEX, wb_handler)
    router.routes.insert(0, route)

    # use our Django error view
    router.error_view = PermaTemplateView('archive/archive-error.html')
    wb_handler.not_found_view = router.error_view

    return router
Beispiel #3
0
def init_collection(route_config):
    ds_rules_file = route_config.get('domain_specific_rules', None)

    html_view = init_view(route_config, 'query_html', J2HtmlCapturesView)

    server_cls = route_config.get('server_cls')

    query_handler = QueryHandler.init_from_config(route_config, ds_rules_file,
                                                  html_view, server_cls)

    return query_handler
Beispiel #4
0
def init_collection(route_config):
    ds_rules_file = route_config.get('domain_specific_rules', None)

    html_view = init_view(route_config, 'query_html', J2HtmlCapturesView)

    server_cls = route_config.get('server_cls')

    query_handler = QueryHandler.init_from_config(route_config,
                                                  ds_rules_file,
                                                  html_view,
                                                  server_cls)

    return query_handler
Beispiel #5
0
def test_excluded(testconfig):
    # sources = testconfig.get('index_paths')
    # perms_policy = testconfig.get('perms_policy')

    # cdx_server = CDXServer(sources)
    # index_handler = IndexHandler(cdx_server, perms_policy=perms_policy)
    query_handler = QueryHandler.init_from_config(testconfig)

    url = "http://www.iana.org/_img/bookmark_icon.ico"

    params = dict(url=url)

    with raises(AccessException):
        cdxobjs = list(query_handler.load_cdx(None, params))
        print cdxobjs
Beispiel #6
0
def test_excluded(testconfig):
    #sources = testconfig.get('index_paths')
    #perms_policy = testconfig.get('perms_policy')

    #cdx_server = CDXServer(sources)
    #index_handler = IndexHandler(cdx_server, perms_policy=perms_policy)
    query_handler = QueryHandler.init_from_config(testconfig)

    url = 'http://www.iana.org/_img/bookmark_icon.ico'

    params = dict(url=url)

    with raises(AccessException):
        cdxobjs = list(query_handler.load_cdx(None, params))
        print(cdxobjs)
Beispiel #7
0
def create_perma_pywb_app(config):
    """
        Configure server.

        This should do basically the same stuff as pywb.webapp.pywb_init.create_wb_router()
    """
    # paths
    script_path = os.path.dirname(__file__)

    # Get root storage location for warcs.
    # archive_path should be the location pywb can find warcs, like 'file://generated/' or 'http://perma.s3.amazonaws.com/generated/'
    # We can get it by requesting the location of a blank file from default_storage.
    # default_storage may use disk or network storage depending on config, so we look for either a path() or url()
    try:
        archive_path = 'file://' + default_storage.path('') + '/'
    except NotImplementedError:
        archive_path = default_storage.url('/')
        archive_path = archive_path.split('?', 1)[0]  # remove query params

    query_handler = QueryHandler.init_from_config(PermaCDXSource())

    # pywb template vars (used in templates called by pywb, such as head_insert.html, but not our ErrorTemplateView)
    add_env_globals({'static_path': settings.STATIC_URL})

    # use util func to create the handler
    wb_handler = create_wb_handler(query_handler,
                                   dict(archive_paths=[archive_path],
                                        wb_handler_class=Handler,
                                        buffer_response=True,

                                        head_insert_html=os.path.join(script_path, 'head_insert.html'),

                                        redir_to_exact=False))

    wb_handler.replay.content_loader.record_loader.loader = CachedLoader()

    # Finally, create wb router
    return Router(
        {
            Route(r'([a-zA-Z0-9\-]+)', wb_handler)
        },
        # Specify hostnames that pywb will be running on
        # This will help catch occasionally missed rewrites that fall-through to the host
        # (See archivalrouter.ReferRedirect)
        hostpaths=['http://localhost:8000/'],
        port=8000,
        error_view=ErrorTemplateView()
    )
Beispiel #8
0
def create_perma_wb_router(config={}):
    """
        Configure server.

        This should do basically the same stuff as pywb.webapp.pywb_init.create_wb_router()
    """
    # paths
    script_path = os.path.dirname(__file__)

    # Get root storage location for warcs.
    # archive_path should be the location pywb can find warcs, like 'file://generated/' or 'http://perma.s3.amazonaws.com/generated/'
    # We can get it by requesting the location of a blank file from default_storage.
    # default_storage may use disk or network storage depending on config, so we look for either a path() or url()
    try:
        archive_path = 'file://' + default_storage.path('') + '/'
    except NotImplementedError:
        archive_path = default_storage.url('/')
        archive_path = archive_path.split('?', 1)[0]  # remove query params

    query_handler = QueryHandler.init_from_config(PermaCDXSource())

    # pywb template vars (used in templates called by pywb, such as head_insert.html, but not our ErrorTemplateView)
    add_env_globals({'static_path': settings.STATIC_URL})

    # use util func to create the handler
    wb_handler = create_wb_handler(
        query_handler,
        dict(archive_paths=[archive_path],
             wb_handler_class=PermaGUIDHandler,
             buffer_response=True,
             head_insert_html=os.path.join(script_path, 'head_insert.html'),
             enable_memento=True,
             redir_to_exact=False))

    wb_handler.replay.content_loader.record_loader.loader = CachedLoader()

    route = PermaRoute(GUID_REGEX, wb_handler)

    router = create_wb_router(config)
    router.error_view = PermaTemplateView('archive-error.html')
    router.routes.insert(0, route)

    return router
Beispiel #9
0
def create_perma_pywb_app(config):
    """
        Configure server.
    """
    query_handler = QueryHandler.init_from_config(settings.CDX_SERVER_URL)

    # Get root storage location for warcs.
    # archive_path should be the location pywb can find warcs, like 'file://generated/' or 'http://perma.s3.amazonaws.com/generated/'
    # We can get it by requesting the location of a blank file from default_storage.
    # default_storage may use disk or network storage depending on config, so we look for either a path() or url()
    try:
        archive_path = 'file://' + default_storage.path('') + '/'
    except NotImplementedError:
        archive_path = default_storage.url('/')
        archive_path = archive_path.split('?', 1)[0]  # remove query params

    # use util func to create the handler
    wb_handler = create_wb_handler(query_handler,
                                   dict(archive_paths=[archive_path],
                                        wb_handler_class=Handler,
                                        buffer_response=True,

                                        head_insert_html='ui/head_insert.html',
                                        template_globals={'static_path': 'static/js'},

                                        redir_to_exact=False))

    # Finally, create wb router
    return archivalrouter.ArchivalRouter(
        {
            Route(r'([a-zA-Z0-9\-]+)', wb_handler)
        },
        # Specify hostnames that pywb will be running on
        # This will help catch occasionally missed rewrites that fall-through to the host
        # (See archivalrouter.ReferRedirect)
        hostpaths=['http://localhost:8000/'],
        port=8000
    )