def setup_class(cls): super(TestCDXJCache, cls).setup_class(extra_config_file='test_cdxj_cache_config.yaml') cls.worker = Worker(StorageCommitter) gevent.spawn(cls.worker.run) cls.m = init_manager_for_cli()
def setup_class(cls, **kwargs): os.environ['AUTO_LOGIN_USER'] = '******' super(TestUpload, cls).setup_class(**kwargs) cls.manager = init_manager_for_cli() cls.warc = None cls.worker = Worker(TempChecker) gevent.spawn(cls.worker.run)
def __init__(self, app_port=8090, rec_port=0, warc_port=0, env_params=None, run_tempchecker=False, run_storagecommitter=False): if env_params: os.environ.update(env_params) def warcserver(): from webrecorder.load.main import WRWarcServer return WRWarcServer().app def recorder(): from webrecorder.rec.main import init as record_init return record_init() def app(): from webrecorder.maincontroller import MainController app = MainController().app return app self.warc_serv = self.init_server(warc_port, warcserver, 'WARCSERVER_HOST') self.rec_serv = self.init_server(rec_port, recorder, 'RECORD_HOST') self.app_serv = self.init_server(app_port, app, 'APP_HOST') self.storage_worker = Worker( StorageCommitter) if run_storagecommitter else None if self.storage_worker: gevent.spawn(self.storage_worker.run) self.temp_worker = Worker(TempChecker) if run_tempchecker else None if self.temp_worker: gevent.spawn(self.temp_worker.run)
def setup_class(cls, **kwargs): super(TestTempContent, cls).setup_class(**kwargs) def make_id(self): sesh_id = 'sesh_id' redis_key = self.key_template.format(sesh_id) return sesh_id, redis_key cls.seshmock = patch( 'webrecorder.session.RedisSessionMiddleware.make_id', make_id) cls.seshmock.start() cls.dyn_stats = [] cls.worker = Worker(TempChecker) gevent.spawn(cls.worker.run)
storage_type = self.redis.hget(info_key, 'storage_type') config = None # attempt to find storage profile by name if storage_type: config = self.redis.hgetall( self.storage_key_templ.format(name=storage_type)) # default storage profile if not config: config = self.default_storage_profile # storage profile class stored in profile 'type' storage_class = self.storage_class_map.get(config['type']) # keeping local storage only if not storage_class: return None return storage_class(config) def add_storage_class(self, type_, cls): self.storage_class_map[type_] = cls # ============================================================================= if __name__ == "__main__": from webrecorder.rec.worker import Worker Worker(StorageCommitter).run()
def run(): from webrecorder.rec.worker import Worker Worker(StorageCommitter).run()
if not dir_name.startswith(self.temp_prefix): self.remove_empty_user_dir(warc_dir) continue # not yet removed, need to delete contents temp_user = warc_dir.rsplit(os.path.sep, 1)[1] temps_to_remove.add((temp_user, warc_dir)) temp_match = User.INFO_KEY.format(user=self.temp_prefix + '*') #print('Temp Key Check') for redis_key in self.data_redis.scan_iter(match=temp_match, count=100): temp_user = redis_key.rsplit(':', 2)[1] if temp_user not in temps_to_remove: temps_to_remove.add( (temp_user, os.path.join(self.record_root_dir, temp_user))) # remove if expired for temp_user, temp_dir in temps_to_remove: self.delete_if_expired(temp_user, temp_dir) # ============================================================================= if __name__ == "__main__": from webrecorder.rec.worker import Worker Worker(TempChecker).run()
class FullStackRunner(object): def __init__(self, app_port=8090, rec_port=0, warc_port=0, env_params=None, run_tempchecker=False, run_storagecommitter=False): if env_params: os.environ.update(env_params) def warcserver(): from webrecorder.load.main import WRWarcServer return WRWarcServer().app def recorder(): from webrecorder.rec.main import init as record_init return record_init() def app(): from webrecorder.maincontroller import MainController app = MainController().app return app self.warc_serv = self.init_server(warc_port, warcserver, 'WARCSERVER_HOST') self.rec_serv = self.init_server(rec_port, recorder, 'RECORD_HOST') self.app_serv = self.init_server(app_port, app, 'APP_HOST') self.storage_worker = Worker( StorageCommitter) if run_storagecommitter else None if self.storage_worker: gevent.spawn(self.storage_worker.run) self.temp_worker = Worker(TempChecker) if run_tempchecker else None if self.temp_worker: gevent.spawn(self.temp_worker.run) def close(self): if self.temp_worker: self.temp_worker.stop() self.temp_worker = None if self.storage_worker: self.storage_worker.stop() self.storage_worker = None self.stop_server(self.warc_serv) self.stop_server(self.rec_serv) self.stop_server(self.app_serv) # try closing writer try: if self.rec_serv: self.rec_serv.server.application.wr.close() #self.rec_serv.server.application.wr.writer.close() except Exception as e: traceback.print_exc() def init_server(self, port, func, env_var_name=None): if port < 0: return None result = GeventServer(func(), port, handler_class=ws_handler_class) if env_var_name: os.environ[env_var_name] = 'http://localhost:{0}'.format( result.port) print(env_var_name + '=' + os.environ[env_var_name], flush=True) return result def stop_server(self, serv): if serv: serv.stop()
recording.set_derivs_recording(derivs_recording) derivs_rec = derivs_recording.my_id crawl_def = SEARCH_CRAWL_DEF.copy() crawl_def['coll'] = crawl_def['screenshot_coll'] = crawl_def[ 'text_coll'] = data['coll'] crawl_def['user_params'] = { 'user': data['user'], 'coll': data['coll'], 'coll_name': data['coll_name'], 'rec': derivs_rec, 'type': 'replay-coll', # updated later 'request_ts': '', 'browser': BROWSER } crawl_def['name'] = 'text-' + data['user'] + '-' + data['coll'] crawl_def['seed_urls'] = data['pages'] print(crawl_def) r = requests.post(self.browsertrix_url, json=crawl_def) print(r.text) # ============================================================================= if __name__ == "__main__": from webrecorder.rec.worker import Worker Worker(SearchAutomation, 120).run()