def __init__(self, worker_cls): self._running = True self.sleep_secs = int(os.environ.get('TEMP_SLEEP_CHECK', 30)) logger.info('Worker: Running {0} every {1}'.format(worker_cls.__name__, self.sleep_secs)) config = load_wr_config() self.worker = worker_cls(config)
def __init__(self, worker_cls): self._running = True self.sleep_secs = int(os.environ.get('TEMP_SLEEP_CHECK', 30)) logger.info('Worker: Running {0} every {1}'.format( worker_cls.__name__, self.sleep_secs)) config = load_wr_config() self.worker = worker_cls(config)
def init(): config = load_wr_config() wr = WebRecRecorder(config) gevent.spawn(wr.msg_listen_loop) wr.init_app(None) wr.app.wr = wr return wr.app
def make_webagg(): config = load_wr_config() app = ResAggApp(debug=True) redis_base = os.environ['REDIS_BASE_URL'] + '/' rec_url = redis_base + config['cdxj_key_templ'] coll_url = redis_base + config['cdxj_coll_key_templ'] warc_url = redis_base + config['warc_key_templ'] rec_list_key = config['rec_list_key_templ'] cache_proxy_url = os.environ.get('CACHE_PROXY_URL') global PROXY_PREFIX PROXY_PREFIX = cache_proxy_url rec_redis_source = MountMultiKeyIndexSource(timeout=20.0, redis_url=rec_url) redis = rec_redis_source.redis coll_redis_source = MountMultiKeyIndexSource(timeout=20.0, redis_url=coll_url, redis=redis, member_key_templ=rec_list_key) mount_only_source = MountMultiKeyIndexSource(timeout=20.0, redis_url=coll_url, redis=redis, member_key_templ=rec_list_key, mounts_only=True) live_rec = DefaultResourceHandler( SimpleAggregator( { 'live': LiveIndexSource(), 'mount': mount_only_source }, ), warc_url, cache_proxy_url) replay_rec = DefaultResourceHandler( SimpleAggregator({'replay': rec_redis_source}), warc_url, cache_proxy_url) replay_coll = DefaultResourceHandler( SimpleAggregator({'replay': coll_redis_source}), warc_url, cache_proxy_url) app.add_route('/live', live_rec) app.add_route('/replay', replay_rec) app.add_route('/replay-coll', replay_coll) app.add_route('/patch', HandlerSeq([replay_coll, live_rec])) return app
def init(): init_logging(debug=True) config = load_wr_config() wr = WebRecRecorder(config) spawn_once(wr.msg_listen_loop) wr.init_app() wr.app.wr = wr return wr.app
def init(): init_logging(debug=literal_eval(os.environ.get('WR_DEBUG', 'True'))) config = load_wr_config() wr = WebRecRecorder(config) spawn_once(wr.msg_listen_loop) wr.init_app() wr.app.wr = wr return wr.app
def init(): init_logging() config = load_wr_config() wr = WebRecRecorder(config) spawn_once(wr.msg_listen_loop) wr.init_app() wr.app.wr = wr return wr.app
def run(): config = load_wr_config() temp_checker = TempChecker(config) sleep_secs = int(os.environ.get('TEMP_SLEEP_CHECK', 30)) print('Running temp delete check every {0}'.format(sleep_secs)) while True: try: temp_checker() time.sleep(sleep_secs) except: import traceback traceback.print_exc()
def __init__(self, redis_url=None): config = load_wr_config() self.base_access = BaseAccess() # Init Redis if not redis_url: redis_url = os.environ['REDIS_BASE_URL'] r = redis.StrictRedis.from_url(redis_url, decode_responses=True) # Init Cork cork = WebRecCork.create_cork(r, config) super(CLIUserManager, self).__init__(redis=r, cork=cork, config=config)
def init_manager_for_cli(): config = load_wr_config() # Init Redis redis_url = os.environ['REDIS_BASE_URL'] r = redis.StrictRedis.from_url(redis_url, decode_responses=True) # Init Cork cork = WebRecCork.create_cork(r, config) # Init Manager manager = CLIRedisDataManager(r, cork, None, None, None, config) manager.fake_session = Session(cork, {}, '', {'anon': True}, -1, False) return manager
def __init__(self, redis_url=None): config = load_wr_config() self.base_access = BaseAccess() # Init Redis if not redis_url: redis_url = os.environ['REDIS_BASE_URL'] r = redis.StrictRedis.from_url(redis_url, decode_responses=True) # Init Cork cork = WebRecCork.create_cork(r, config) super(CLIUserManager, self).__init__( redis=r, cork=cork, config=config)
def init(): init_logging() config = load_wr_config() wr = WebRecRecorder(config) if postfork: @postfork def listen_loop(): if uwsgi.mule_id() == 0: gevent.spawn(wr.msg_listen_loop) else: gevent.spawn(wr.msg_listen_loop) wr.init_app(None) wr.app.wr = wr return wr.app
def run(): sleep_secs = int(os.environ.get('TEMP_SLEEP_CHECK', 30)) print('Running storage committer {0}'.format(sleep_secs)) from webrecorder.rec.s3 import S3Storage config = load_wr_config() storage_committer = StorageCommitter(config) storage_committer.add_storage_class('s3', S3Storage) while True: try: storage_committer() time.sleep(sleep_secs) storage_committer.redis.publish('close_idle', '') except: import traceback traceback.print_exc()
def make_webagg(): config = load_wr_config() app = ResAggApp(debug=True) redis_base = os.environ['REDIS_BASE_URL'] + '/' rec_url = redis_base + config['cdxj_key_templ'] coll_url = redis_base + config['cdxj_coll_key_templ'] warc_url = redis_base + config['warc_key_templ'] cache_proxy_url = os.environ.get('CACHE_PROXY_URL') AitFilterIndexSource.PROXY_PREFIX = cache_proxy_url rec_redis_source = RedisIndexSource(rec_url) redis = rec_redis_source.redis live_rec = DefaultResourceHandler( SimpleAggregator({'live': LiveIndexSource()}), warc_url, cache_proxy_url) replay_rec = DefaultResourceHandler( SimpleAggregator({'replay': rec_redis_source}), warc_url, cache_proxy_url) replay_coll = DefaultResourceHandler( SimpleAggregator({ 'replay': MountMultiKeyIndexSource(timeout=20.0, redis_url=coll_url, redis=redis) }), warc_url, cache_proxy_url) app.add_route('/live', live_rec) app.add_route('/replay', replay_rec) app.add_route('/replay-coll', replay_coll) app.add_route('/patch', HandlerSeq([replay_coll, live_rec])) return app
def __init__(self): init_logging() config = load_wr_config() app = BaseWarcServer(debug=True) redis_base = os.environ['REDIS_BASE_URL'] + '/' #rec_url = redis_base + config['cdxj_key_templ'] #coll_url = redis_base + config['coll_cdxj_key_templ'] #warc_url = redis_base + config['coll_warc_key_templ'] #rec_map_key = config['rec_map_key_templ'] rec_url = redis_base + Recording.CDXJ_KEY coll_url = redis_base + Collection.COLL_CDXJ_KEY warc_url = redis_base + Recording.COLL_WARC_KEY rec_map_key = Collection.RECS_KEY redis_resolver = RedisResolver(redis_url=warc_url, member_key_templ=rec_map_key) redis = redis_resolver.redis warc_resolvers = [redis_resolver] cache_proxy_url = os.environ.get('CACHE_PROXY_URL', '') global PROXY_PREFIX PROXY_PREFIX = cache_proxy_url timeout = 20.0 rec_redis_source = RedisIndexSource(timeout=timeout, redis_url=rec_url, redis=redis) coll_redis_source = RedisIndexSource(timeout=timeout, redis_url=coll_url, redis=redis) live_rec = DefaultResourceHandler( SimpleAggregator( {'live': LiveIndexSource()}, ), warc_resolvers, cache_proxy_url) # Extractable archives (all available) wam_loader = WAMSourceLoader(memento_cls=ProxyMementoIndexSource, remote_cls=ProxyRemoteIndexSource, wb_memento_cls=ProxyWBMementoIndexSource) extractable_archives = wam_loader.sources # Extract Source extractor = GeventTimeoutAggregator(extractable_archives, timeout=timeout) extract_primary = DefaultResourceHandler( extractor, warc_resolvers, cache_proxy_url) # Patch fallback archives fallback_archives = self.filter_archives(extractable_archives, config['patch_archives_index']) # patch + live #patch_archives = fallback_archives.copy() patch_archives = fallback_archives patch_archives['live'] = LiveIndexSource() extractor2 = GeventTimeoutAggregator(patch_archives, timeout=timeout, sources_key='inv_sources', invert_sources=True) extract_other = DefaultResourceHandler( extractor2, warc_resolvers, cache_proxy_url) patcher = GeventTimeoutAggregator(patch_archives, timeout=timeout) patch_rec = DefaultResourceHandler( patcher, warc_resolvers, cache_proxy_url) # Single Rec Replay replay_rec = DefaultResourceHandler(SimpleAggregator({'local': rec_redis_source}), warc_resolvers, cache_proxy_url) # Coll Replay replay_coll = DefaultResourceHandler(SimpleAggregator({'local': coll_redis_source}), warc_resolvers, cache_proxy_url) app.add_route('/live', live_rec) app.add_route('/extract', HandlerSeq([extract_primary, extract_other, replay_rec])) app.add_route('/replay', replay_rec) app.add_route('/replay-coll', replay_coll) app.add_route('/patch', HandlerSeq([replay_coll, patch_rec])) self.app = app
def __init__(self, redis_url=None): self._init_logging() if getattr(sys, 'frozen', False): self.static_root = os.path.join(sys._MEIPASS, 'webrecorder', 'static/') else: self.static_root = resource_filename('webrecorder', 'static/') bottle_app = Bottle() self.bottle_app = bottle_app # JSON encoding for datetime objects self.bottle_app.install( JSONPlugin( json_dumps=lambda s: json.dumps(s, cls=CustomJSONEncoder))) config = load_wr_config() # Init Redis if not redis_url: redis_url = os.environ['REDIS_BASE_URL'] self.redis = redis.StrictRedis.from_url(redis_url) self.browser_redis = redis.StrictRedis.from_url( os.environ['REDIS_BROWSER_URL'], decode_responses=True) self.session_redis = redis.StrictRedis.from_url( os.environ['REDIS_SESSION_URL']) # Init Jinja jinja_env = self.init_jinja_env(config) # Init Content Loader/Rewriter content_app = ContentController(app=bottle_app, jinja_env=jinja_env, config=config, redis=self.redis) # Init Browser Mgr self.browser_mgr = BrowserManager(config, self.browser_redis, content_app) # Init Cork self.cork = WebRecCork.create_cork(self.redis, config) # Init Manager manager = RedisDataManager(self.redis, self.cork, content_app, self.browser_redis, self.browser_mgr, config) # Init Sesion temp_prefix Session.temp_prefix = config['temp_prefix'] # Init Core app controllers for controller_type in self.ALL_CONTROLLERS: x = controller_type(app=bottle_app, jinja_env=jinja_env, manager=manager, config=config) # Set Error Handler bottle_app.default_error_handler = self.make_err_handler( bottle_app.default_error_handler) final_app = RedisSessionMiddleware(bottle_app, self.cork, self.session_redis, config) super(AppController, self).__init__(final_app, jinja_env, manager, config)
def __init__(self): init_logging() config = load_wr_config() app = BaseWarcServer(debug=True) redis_base = os.environ['REDIS_BASE_URL'] + '/' rec_url = redis_base + config['cdxj_key_templ'] coll_url = redis_base + config['coll_cdxj_key_templ'] warc_url = redis_base + config['warc_key_templ'] rec_list_key = config['rec_list_key_templ'] redis_resolver = RedisResolver(redis_url=warc_url, member_key_templ=rec_list_key) redis = redis_resolver.redis warc_resolvers = [redis_resolver] cache_proxy_url = os.environ.get('CACHE_PROXY_URL', '') global PROXY_PREFIX PROXY_PREFIX = cache_proxy_url timeout = 20.0 rec_redis_source = RedisIndexSource(timeout=timeout, redis_url=rec_url, redis=redis) coll_redis_source = RedisIndexSource(timeout=timeout, redis_url=coll_url, redis=redis) live_rec = DefaultResourceHandler( SimpleAggregator({'live': LiveIndexSource()}, ), warc_resolvers, cache_proxy_url) # Extractable archives (all available) wam_loader = WAMSourceLoader(memento_cls=ProxyMementoIndexSource, remote_cls=ProxyRemoteIndexSource, wb_memento_cls=ProxyWBMementoIndexSource) extractable_archives = wam_loader.sources # Extract Source extractor = GeventTimeoutAggregator(extractable_archives, timeout=timeout) extract_primary = DefaultResourceHandler(extractor, warc_resolvers, cache_proxy_url) # Patch fallback archives fallback_archives = self.filter_archives( extractable_archives, config['patch_archives_index']) # patch + live #patch_archives = fallback_archives.copy() patch_archives = fallback_archives patch_archives['live'] = LiveIndexSource() extractor2 = GeventTimeoutAggregator(patch_archives, timeout=timeout, sources_key='inv_sources', invert_sources=True) extract_other = DefaultResourceHandler(extractor2, warc_resolvers, cache_proxy_url) patcher = GeventTimeoutAggregator(patch_archives, timeout=timeout) patch_rec = DefaultResourceHandler(patcher, warc_resolvers, cache_proxy_url) # Single Rec Replay replay_rec = DefaultResourceHandler( SimpleAggregator({'local': rec_redis_source}), warc_resolvers, cache_proxy_url) # Coll Replay replay_coll = DefaultResourceHandler( SimpleAggregator({'local': coll_redis_source}), warc_resolvers, cache_proxy_url) app.add_route('/live', live_rec) app.add_route('/extract', HandlerSeq([extract_primary, extract_other, replay_rec])) app.add_route('/replay', replay_rec) app.add_route('/replay-coll', replay_coll) app.add_route('/patch', HandlerSeq([replay_coll, patch_rec])) self.app = app
def __init__(self, redis_url=None): self._init_logging() if getattr(sys, 'frozen', False): self.static_root = os.path.join(sys._MEIPASS, 'webrecorder', 'static/') else: self.static_root = resource_filename('webrecorder', 'static/') # only launch if running in place, not from installed package if '.egg' not in __file__: spawn_once(default_build, worker=1, force_build=False) BaseRequest.MEMFILE_MAX = 500000 # 500kb bottle_app = APIBottle() self.bottle_app = bottle_app # JSON encoding for datetime objects # self.bottle_app.install(JSONPlugin(json_dumps=lambda s: json.dumps(s, cls=CustomJSONEncoder))) config = load_wr_config() # Init Redis if not redis_url: redis_url = os.environ['REDIS_BASE_URL'] self.redis = redis.StrictRedis.from_url(redis_url, decode_responses=True) browser_redis = redis.StrictRedis.from_url(os.environ['REDIS_BROWSER_URL'], decode_responses=True) session_redis = redis.StrictRedis.from_url(os.environ['REDIS_SESSION_URL']) self.content_error_redirect = os.environ.get('CONTENT_ERROR_REDIRECT') # Init Jinja jinja_env = self.init_jinja_env(config) # Init Cork cork = WebRecCork.create_cork(self.redis, config) # User Manager user_manager = UserManager(redis=self.redis, cork=cork, config=config) # Init Browser Mgr browser_mgr = BrowserManager(config, browser_redis, user_manager) # Init Dat Share DatShare.dat_share = DatShare(self.redis) # Init Content Loader/Rewriter content_app = ContentController(app=bottle_app, jinja_env=jinja_env, user_manager=user_manager, config=config, browser_mgr=browser_mgr, redis=self.redis, cork=cork) # Init Sesion temp_prefix Session.temp_prefix = config['temp_prefix'] kwargs = dict(app=bottle_app, jinja_env=jinja_env, user_manager=user_manager, browser_mgr=browser_mgr, content_app=content_app, cork=cork, redis=self.redis, session_redis=session_redis, config=config) # Init Core app controllers for controller_type in self.ALL_CONTROLLERS: x = controller_type(**kwargs) # Set Error Handler bottle_app.default_error_handler = self.make_err_handler( bottle_app.default_error_handler) final_app = RedisSessionMiddleware(bottle_app, cork, session_redis, config, access_cls=SessionAccessCache, access_redis=self.redis) final_app = WSGIProxMiddleware(final_app, '/_proxy/', proxy_host='webrecorder.proxy', proxy_options=self._get_proxy_options()) kwargs['app'] = final_app super(MainController, self).__init__(**kwargs) self.browser_mgr = browser_mgr self.content_app = content_app wr_api_spec.build_api_spec()