Esempio n. 1
0
    def __init__(self, worker_cls):
        self._running = True

        self.sleep_secs = int(os.environ.get('TEMP_SLEEP_CHECK', 30))
        logger.info('Worker: Running {0} every {1}'.format(worker_cls.__name__, self.sleep_secs))

        config = load_wr_config()

        self.worker = worker_cls(config)
Esempio n. 2
0
    def __init__(self, worker_cls):
        self._running = True

        self.sleep_secs = int(os.environ.get('TEMP_SLEEP_CHECK', 30))
        logger.info('Worker: Running {0} every {1}'.format(
            worker_cls.__name__, self.sleep_secs))

        config = load_wr_config()

        self.worker = worker_cls(config)
Esempio n. 3
0
def init():
    config = load_wr_config()

    wr = WebRecRecorder(config)

    gevent.spawn(wr.msg_listen_loop)

    wr.init_app(None)
    wr.app.wr = wr

    return wr.app
Esempio n. 4
0
def make_webagg():
    config = load_wr_config()

    app = ResAggApp(debug=True)

    redis_base = os.environ['REDIS_BASE_URL'] + '/'

    rec_url = redis_base + config['cdxj_key_templ']
    coll_url = redis_base + config['cdxj_coll_key_templ']
    warc_url = redis_base + config['warc_key_templ']
    rec_list_key = config['rec_list_key_templ']

    cache_proxy_url = os.environ.get('CACHE_PROXY_URL')
    global PROXY_PREFIX
    PROXY_PREFIX = cache_proxy_url

    rec_redis_source = MountMultiKeyIndexSource(timeout=20.0,
                                                redis_url=rec_url)

    redis = rec_redis_source.redis
    coll_redis_source = MountMultiKeyIndexSource(timeout=20.0,
                                                 redis_url=coll_url,
                                                 redis=redis,
                                                 member_key_templ=rec_list_key)

    mount_only_source = MountMultiKeyIndexSource(timeout=20.0,
                                                 redis_url=coll_url,
                                                 redis=redis,
                                                 member_key_templ=rec_list_key,
                                                 mounts_only=True)

    live_rec = DefaultResourceHandler(
        SimpleAggregator(
            {
                'live': LiveIndexSource(),
                'mount': mount_only_source
            }, ), warc_url, cache_proxy_url)

    replay_rec = DefaultResourceHandler(
        SimpleAggregator({'replay': rec_redis_source}), warc_url,
        cache_proxy_url)

    replay_coll = DefaultResourceHandler(
        SimpleAggregator({'replay': coll_redis_source}), warc_url,
        cache_proxy_url)

    app.add_route('/live', live_rec)
    app.add_route('/replay', replay_rec)
    app.add_route('/replay-coll', replay_coll)
    app.add_route('/patch', HandlerSeq([replay_coll, live_rec]))

    return app
Esempio n. 5
0
def init():
    init_logging(debug=True)

    config = load_wr_config()

    wr = WebRecRecorder(config)

    spawn_once(wr.msg_listen_loop)

    wr.init_app()
    wr.app.wr = wr

    return wr.app
Esempio n. 6
0
def init():
    init_logging(debug=literal_eval(os.environ.get('WR_DEBUG', 'True')))

    config = load_wr_config()

    wr = WebRecRecorder(config)

    spawn_once(wr.msg_listen_loop)

    wr.init_app()
    wr.app.wr = wr

    return wr.app
Esempio n. 7
0
def init():
    init_logging()

    config = load_wr_config()

    wr = WebRecRecorder(config)

    spawn_once(wr.msg_listen_loop)

    wr.init_app()
    wr.app.wr = wr

    return wr.app
Esempio n. 8
0
def run():
    config = load_wr_config()
    temp_checker = TempChecker(config)

    sleep_secs = int(os.environ.get('TEMP_SLEEP_CHECK', 30))

    print('Running temp delete check every {0}'.format(sleep_secs))
    while True:
        try:
            temp_checker()
            time.sleep(sleep_secs)
        except:
            import traceback
            traceback.print_exc()
Esempio n. 9
0
    def __init__(self, redis_url=None):
        config = load_wr_config()

        self.base_access = BaseAccess()

        # Init Redis
        if not redis_url:
            redis_url = os.environ['REDIS_BASE_URL']

        r = redis.StrictRedis.from_url(redis_url, decode_responses=True)

        # Init Cork
        cork = WebRecCork.create_cork(r, config)

        super(CLIUserManager, self).__init__(redis=r, cork=cork, config=config)
Esempio n. 10
0
def init_manager_for_cli():
    config = load_wr_config()

    # Init Redis
    redis_url = os.environ['REDIS_BASE_URL']

    r = redis.StrictRedis.from_url(redis_url, decode_responses=True)

    # Init Cork
    cork = WebRecCork.create_cork(r, config)

    # Init Manager
    manager = CLIRedisDataManager(r, cork, None, None, None, config)
    manager.fake_session = Session(cork, {}, '', {'anon': True}, -1, False)

    return manager
Esempio n. 11
0
    def __init__(self, redis_url=None):
        config = load_wr_config()

        self.base_access = BaseAccess()

        # Init Redis
        if not redis_url:
            redis_url = os.environ['REDIS_BASE_URL']

        r = redis.StrictRedis.from_url(redis_url, decode_responses=True)

        # Init Cork
        cork = WebRecCork.create_cork(r, config)

        super(CLIUserManager, self).__init__(
            redis=r,
            cork=cork,
            config=config)
def init():
    init_logging()

    config = load_wr_config()

    wr = WebRecRecorder(config)

    if postfork:

        @postfork
        def listen_loop():
            if uwsgi.mule_id() == 0:
                gevent.spawn(wr.msg_listen_loop)
    else:
        gevent.spawn(wr.msg_listen_loop)

    wr.init_app(None)
    wr.app.wr = wr

    return wr.app
Esempio n. 13
0
def run():
    sleep_secs = int(os.environ.get('TEMP_SLEEP_CHECK', 30))
    print('Running storage committer {0}'.format(sleep_secs))

    from webrecorder.rec.s3 import S3Storage

    config = load_wr_config()

    storage_committer = StorageCommitter(config)
    storage_committer.add_storage_class('s3', S3Storage)

    while True:
        try:
            storage_committer()
            time.sleep(sleep_secs)

            storage_committer.redis.publish('close_idle', '')
        except:
            import traceback
            traceback.print_exc()
Esempio n. 14
0
def make_webagg():
    config = load_wr_config()

    app = ResAggApp(debug=True)

    redis_base = os.environ['REDIS_BASE_URL'] + '/'

    rec_url = redis_base + config['cdxj_key_templ']
    coll_url = redis_base + config['cdxj_coll_key_templ']
    warc_url = redis_base + config['warc_key_templ']

    cache_proxy_url = os.environ.get('CACHE_PROXY_URL')
    AitFilterIndexSource.PROXY_PREFIX = cache_proxy_url

    rec_redis_source = RedisIndexSource(rec_url)
    redis = rec_redis_source.redis

    live_rec = DefaultResourceHandler(
        SimpleAggregator({'live': LiveIndexSource()}), warc_url,
        cache_proxy_url)

    replay_rec = DefaultResourceHandler(
        SimpleAggregator({'replay': rec_redis_source}), warc_url,
        cache_proxy_url)

    replay_coll = DefaultResourceHandler(
        SimpleAggregator({
            'replay':
            MountMultiKeyIndexSource(timeout=20.0,
                                     redis_url=coll_url,
                                     redis=redis)
        }), warc_url, cache_proxy_url)

    app.add_route('/live', live_rec)
    app.add_route('/replay', replay_rec)
    app.add_route('/replay-coll', replay_coll)
    app.add_route('/patch', HandlerSeq([replay_coll, live_rec]))

    return app
Esempio n. 15
0
    def __init__(self):
        init_logging()

        config = load_wr_config()

        app = BaseWarcServer(debug=True)

        redis_base = os.environ['REDIS_BASE_URL'] + '/'

        #rec_url = redis_base + config['cdxj_key_templ']
        #coll_url = redis_base + config['coll_cdxj_key_templ']
        #warc_url = redis_base + config['coll_warc_key_templ']
        #rec_map_key = config['rec_map_key_templ']
        rec_url = redis_base + Recording.CDXJ_KEY
        coll_url = redis_base + Collection.COLL_CDXJ_KEY
        warc_url = redis_base + Recording.COLL_WARC_KEY
        rec_map_key = Collection.RECS_KEY


        redis_resolver = RedisResolver(redis_url=warc_url,
                                       member_key_templ=rec_map_key)
        redis = redis_resolver.redis
        warc_resolvers = [redis_resolver]

        cache_proxy_url = os.environ.get('CACHE_PROXY_URL', '')
        global PROXY_PREFIX
        PROXY_PREFIX = cache_proxy_url

        timeout = 20.0

        rec_redis_source = RedisIndexSource(timeout=timeout,
                                            redis_url=rec_url,
                                            redis=redis)

        coll_redis_source = RedisIndexSource(timeout=timeout,
                                             redis_url=coll_url,
                                             redis=redis)

        live_rec = DefaultResourceHandler(
                        SimpleAggregator(
                            {'live': LiveIndexSource()},
                        ), warc_resolvers,
                        cache_proxy_url)

        # Extractable archives (all available)
        wam_loader = WAMSourceLoader(memento_cls=ProxyMementoIndexSource,
                                     remote_cls=ProxyRemoteIndexSource,
                                     wb_memento_cls=ProxyWBMementoIndexSource)

        extractable_archives = wam_loader.sources

        # Extract Source
        extractor = GeventTimeoutAggregator(extractable_archives, timeout=timeout)
        extract_primary = DefaultResourceHandler(
                            extractor,
                            warc_resolvers,
                            cache_proxy_url)

        # Patch fallback archives
        fallback_archives = self.filter_archives(extractable_archives,
                                                 config['patch_archives_index'])

        # patch + live
        #patch_archives = fallback_archives.copy()
        patch_archives = fallback_archives
        patch_archives['live'] = LiveIndexSource()

        extractor2 = GeventTimeoutAggregator(patch_archives, timeout=timeout,
                                             sources_key='inv_sources',
                                             invert_sources=True)

        extract_other = DefaultResourceHandler(
                            extractor2,
                            warc_resolvers,
                            cache_proxy_url)

        patcher = GeventTimeoutAggregator(patch_archives, timeout=timeout)
        patch_rec = DefaultResourceHandler(
                         patcher,
                         warc_resolvers,
                         cache_proxy_url)

        # Single Rec Replay
        replay_rec = DefaultResourceHandler(SimpleAggregator({'local': rec_redis_source}),
                                            warc_resolvers,
                                            cache_proxy_url)

        # Coll Replay
        replay_coll = DefaultResourceHandler(SimpleAggregator({'local': coll_redis_source}),
                                             warc_resolvers,
                                             cache_proxy_url)

        app.add_route('/live', live_rec)
        app.add_route('/extract', HandlerSeq([extract_primary, extract_other, replay_rec]))
        app.add_route('/replay', replay_rec)
        app.add_route('/replay-coll', replay_coll)
        app.add_route('/patch', HandlerSeq([replay_coll, patch_rec]))

        self.app = app
Esempio n. 16
0
    def __init__(self, redis_url=None):
        self._init_logging()

        if getattr(sys, 'frozen', False):
            self.static_root = os.path.join(sys._MEIPASS, 'webrecorder',
                                            'static/')
        else:
            self.static_root = resource_filename('webrecorder', 'static/')

        bottle_app = Bottle()
        self.bottle_app = bottle_app

        # JSON encoding for datetime objects
        self.bottle_app.install(
            JSONPlugin(
                json_dumps=lambda s: json.dumps(s, cls=CustomJSONEncoder)))

        config = load_wr_config()

        # Init Redis
        if not redis_url:
            redis_url = os.environ['REDIS_BASE_URL']

        self.redis = redis.StrictRedis.from_url(redis_url)
        self.browser_redis = redis.StrictRedis.from_url(
            os.environ['REDIS_BROWSER_URL'], decode_responses=True)
        self.session_redis = redis.StrictRedis.from_url(
            os.environ['REDIS_SESSION_URL'])

        # Init Jinja
        jinja_env = self.init_jinja_env(config)

        # Init Content Loader/Rewriter
        content_app = ContentController(app=bottle_app,
                                        jinja_env=jinja_env,
                                        config=config,
                                        redis=self.redis)

        # Init Browser Mgr
        self.browser_mgr = BrowserManager(config, self.browser_redis,
                                          content_app)

        # Init Cork
        self.cork = WebRecCork.create_cork(self.redis, config)

        # Init Manager
        manager = RedisDataManager(self.redis, self.cork, content_app,
                                   self.browser_redis, self.browser_mgr,
                                   config)

        # Init Sesion temp_prefix
        Session.temp_prefix = config['temp_prefix']

        # Init Core app controllers
        for controller_type in self.ALL_CONTROLLERS:
            x = controller_type(app=bottle_app,
                                jinja_env=jinja_env,
                                manager=manager,
                                config=config)

        # Set Error Handler
        bottle_app.default_error_handler = self.make_err_handler(
            bottle_app.default_error_handler)

        final_app = RedisSessionMiddleware(bottle_app, self.cork,
                                           self.session_redis, config)

        super(AppController, self).__init__(final_app, jinja_env, manager,
                                            config)
    def __init__(self):
        init_logging()

        config = load_wr_config()

        app = BaseWarcServer(debug=True)

        redis_base = os.environ['REDIS_BASE_URL'] + '/'

        rec_url = redis_base + config['cdxj_key_templ']
        coll_url = redis_base + config['coll_cdxj_key_templ']
        warc_url = redis_base + config['warc_key_templ']
        rec_list_key = config['rec_list_key_templ']

        redis_resolver = RedisResolver(redis_url=warc_url,
                                       member_key_templ=rec_list_key)
        redis = redis_resolver.redis
        warc_resolvers = [redis_resolver]

        cache_proxy_url = os.environ.get('CACHE_PROXY_URL', '')
        global PROXY_PREFIX
        PROXY_PREFIX = cache_proxy_url

        timeout = 20.0

        rec_redis_source = RedisIndexSource(timeout=timeout,
                                            redis_url=rec_url,
                                            redis=redis)

        coll_redis_source = RedisIndexSource(timeout=timeout,
                                             redis_url=coll_url,
                                             redis=redis)

        live_rec = DefaultResourceHandler(
            SimpleAggregator({'live': LiveIndexSource()}, ), warc_resolvers,
            cache_proxy_url)

        # Extractable archives (all available)
        wam_loader = WAMSourceLoader(memento_cls=ProxyMementoIndexSource,
                                     remote_cls=ProxyRemoteIndexSource,
                                     wb_memento_cls=ProxyWBMementoIndexSource)

        extractable_archives = wam_loader.sources

        # Extract Source
        extractor = GeventTimeoutAggregator(extractable_archives,
                                            timeout=timeout)
        extract_primary = DefaultResourceHandler(extractor, warc_resolvers,
                                                 cache_proxy_url)

        # Patch fallback archives
        fallback_archives = self.filter_archives(
            extractable_archives, config['patch_archives_index'])

        # patch + live
        #patch_archives = fallback_archives.copy()
        patch_archives = fallback_archives
        patch_archives['live'] = LiveIndexSource()

        extractor2 = GeventTimeoutAggregator(patch_archives,
                                             timeout=timeout,
                                             sources_key='inv_sources',
                                             invert_sources=True)

        extract_other = DefaultResourceHandler(extractor2, warc_resolvers,
                                               cache_proxy_url)

        patcher = GeventTimeoutAggregator(patch_archives, timeout=timeout)
        patch_rec = DefaultResourceHandler(patcher, warc_resolvers,
                                           cache_proxy_url)

        # Single Rec Replay
        replay_rec = DefaultResourceHandler(
            SimpleAggregator({'local': rec_redis_source}), warc_resolvers,
            cache_proxy_url)

        # Coll Replay
        replay_coll = DefaultResourceHandler(
            SimpleAggregator({'local': coll_redis_source}), warc_resolvers,
            cache_proxy_url)

        app.add_route('/live', live_rec)
        app.add_route('/extract',
                      HandlerSeq([extract_primary, extract_other, replay_rec]))
        app.add_route('/replay', replay_rec)
        app.add_route('/replay-coll', replay_coll)
        app.add_route('/patch', HandlerSeq([replay_coll, patch_rec]))

        self.app = app
Esempio n. 18
0
    def __init__(self, redis_url=None):
        self._init_logging()

        if getattr(sys, 'frozen', False):
            self.static_root = os.path.join(sys._MEIPASS, 'webrecorder', 'static/')
        else:
            self.static_root = resource_filename('webrecorder', 'static/')

            # only launch if running in place, not from installed package
            if '.egg' not in __file__:
                spawn_once(default_build, worker=1, force_build=False)

        BaseRequest.MEMFILE_MAX = 500000 # 500kb

        bottle_app = APIBottle()
        self.bottle_app = bottle_app

        # JSON encoding for datetime objects
        # self.bottle_app.install(JSONPlugin(json_dumps=lambda s: json.dumps(s, cls=CustomJSONEncoder)))

        config = load_wr_config()

        # Init Redis
        if not redis_url:
            redis_url = os.environ['REDIS_BASE_URL']

        self.redis = redis.StrictRedis.from_url(redis_url, decode_responses=True)
        browser_redis = redis.StrictRedis.from_url(os.environ['REDIS_BROWSER_URL'], decode_responses=True)

        session_redis = redis.StrictRedis.from_url(os.environ['REDIS_SESSION_URL'])

        self.content_error_redirect = os.environ.get('CONTENT_ERROR_REDIRECT')

        # Init Jinja
        jinja_env = self.init_jinja_env(config)

        # Init Cork
        cork = WebRecCork.create_cork(self.redis, config)

        # User Manager
        user_manager = UserManager(redis=self.redis,
                                   cork=cork,
                                   config=config)

        # Init Browser Mgr
        browser_mgr = BrowserManager(config, browser_redis, user_manager)

        # Init Dat Share
        DatShare.dat_share = DatShare(self.redis)

        # Init Content Loader/Rewriter
        content_app = ContentController(app=bottle_app,
                                        jinja_env=jinja_env,
                                        user_manager=user_manager,
                                        config=config,
                                        browser_mgr=browser_mgr,
                                        redis=self.redis,
                                        cork=cork)

        # Init Sesion temp_prefix
        Session.temp_prefix = config['temp_prefix']

        kwargs = dict(app=bottle_app,
                      jinja_env=jinja_env,
                      user_manager=user_manager,
                      browser_mgr=browser_mgr,
                      content_app=content_app,
                      cork=cork,
                      redis=self.redis,
                      session_redis=session_redis,
                      config=config)

        # Init Core app controllers
        for controller_type in self.ALL_CONTROLLERS:
            x = controller_type(**kwargs)

        # Set Error Handler
        bottle_app.default_error_handler = self.make_err_handler(
                                            bottle_app.default_error_handler)

        final_app = RedisSessionMiddleware(bottle_app,
                                           cork,
                                           session_redis,
                                           config,
                                           access_cls=SessionAccessCache,
                                           access_redis=self.redis)

        final_app = WSGIProxMiddleware(final_app, '/_proxy/',
                                       proxy_host='webrecorder.proxy',
                                       proxy_options=self._get_proxy_options())

        kwargs['app'] = final_app

        super(MainController, self).__init__(**kwargs)

        self.browser_mgr = browser_mgr
        self.content_app = content_app

        wr_api_spec.build_api_spec()