def setup_class(cls):
        super(TestCDXJCache,
              cls).setup_class(extra_config_file='test_cdxj_cache_config.yaml')

        cls.worker = Worker(StorageCommitter)
        gevent.spawn(cls.worker.run)

        cls.m = init_manager_for_cli()
Example #2
0
    def setup_class(cls, **kwargs):
        os.environ['AUTO_LOGIN_USER'] = '******'
        super(TestUpload, cls).setup_class(**kwargs)

        cls.manager = init_manager_for_cli()

        cls.warc = None

        cls.worker = Worker(TempChecker)
        gevent.spawn(cls.worker.run)
Example #3
0
    def __init__(self,
                 app_port=8090,
                 rec_port=0,
                 warc_port=0,
                 env_params=None,
                 run_tempchecker=False,
                 run_storagecommitter=False):

        if env_params:
            os.environ.update(env_params)

        def warcserver():
            from webrecorder.load.main import WRWarcServer
            return WRWarcServer().app

        def recorder():
            from webrecorder.rec.main import init as record_init
            return record_init()

        def app():
            from webrecorder.maincontroller import MainController
            app = MainController().app
            return app

        self.warc_serv = self.init_server(warc_port, warcserver,
                                          'WARCSERVER_HOST')
        self.rec_serv = self.init_server(rec_port, recorder, 'RECORD_HOST')
        self.app_serv = self.init_server(app_port, app, 'APP_HOST')

        self.storage_worker = Worker(
            StorageCommitter) if run_storagecommitter else None
        if self.storage_worker:
            gevent.spawn(self.storage_worker.run)

        self.temp_worker = Worker(TempChecker) if run_tempchecker else None
        if self.temp_worker:
            gevent.spawn(self.temp_worker.run)
Example #4
0
    def setup_class(cls, **kwargs):
        super(TestTempContent, cls).setup_class(**kwargs)

        def make_id(self):
            sesh_id = 'sesh_id'
            redis_key = self.key_template.format(sesh_id)

            return sesh_id, redis_key

        cls.seshmock = patch(
            'webrecorder.session.RedisSessionMiddleware.make_id', make_id)
        cls.seshmock.start()

        cls.dyn_stats = []

        cls.worker = Worker(TempChecker)
        gevent.spawn(cls.worker.run)
        storage_type = self.redis.hget(info_key, 'storage_type')

        config = None

        # attempt to find storage profile by name
        if storage_type:
            config = self.redis.hgetall(
                self.storage_key_templ.format(name=storage_type))

        # default storage profile
        if not config:
            config = self.default_storage_profile

        # storage profile class stored in profile 'type'
        storage_class = self.storage_class_map.get(config['type'])

        # keeping local storage only
        if not storage_class:
            return None

        return storage_class(config)

    def add_storage_class(self, type_, cls):
        self.storage_class_map[type_] = cls


# =============================================================================
if __name__ == "__main__":
    from webrecorder.rec.worker import Worker
    Worker(StorageCommitter).run()
Example #6
0
def run():
    from webrecorder.rec.worker import Worker
    Worker(StorageCommitter).run()
Example #7
0
            if not dir_name.startswith(self.temp_prefix):
                self.remove_empty_user_dir(warc_dir)
                continue

            # not yet removed, need to delete contents
            temp_user = warc_dir.rsplit(os.path.sep, 1)[1]

            temps_to_remove.add((temp_user, warc_dir))

        temp_match = User.INFO_KEY.format(user=self.temp_prefix + '*')

        #print('Temp Key Check')

        for redis_key in self.data_redis.scan_iter(match=temp_match,
                                                   count=100):
            temp_user = redis_key.rsplit(':', 2)[1]

            if temp_user not in temps_to_remove:
                temps_to_remove.add(
                    (temp_user, os.path.join(self.record_root_dir, temp_user)))

        # remove if expired
        for temp_user, temp_dir in temps_to_remove:
            self.delete_if_expired(temp_user, temp_dir)


# =============================================================================
if __name__ == "__main__":
    from webrecorder.rec.worker import Worker
    Worker(TempChecker).run()
Example #8
0
class FullStackRunner(object):
    def __init__(self,
                 app_port=8090,
                 rec_port=0,
                 warc_port=0,
                 env_params=None,
                 run_tempchecker=False,
                 run_storagecommitter=False):

        if env_params:
            os.environ.update(env_params)

        def warcserver():
            from webrecorder.load.main import WRWarcServer
            return WRWarcServer().app

        def recorder():
            from webrecorder.rec.main import init as record_init
            return record_init()

        def app():
            from webrecorder.maincontroller import MainController
            app = MainController().app
            return app

        self.warc_serv = self.init_server(warc_port, warcserver,
                                          'WARCSERVER_HOST')
        self.rec_serv = self.init_server(rec_port, recorder, 'RECORD_HOST')
        self.app_serv = self.init_server(app_port, app, 'APP_HOST')

        self.storage_worker = Worker(
            StorageCommitter) if run_storagecommitter else None
        if self.storage_worker:
            gevent.spawn(self.storage_worker.run)

        self.temp_worker = Worker(TempChecker) if run_tempchecker else None
        if self.temp_worker:
            gevent.spawn(self.temp_worker.run)

    def close(self):
        if self.temp_worker:
            self.temp_worker.stop()
            self.temp_worker = None

        if self.storage_worker:
            self.storage_worker.stop()
            self.storage_worker = None

        self.stop_server(self.warc_serv)
        self.stop_server(self.rec_serv)
        self.stop_server(self.app_serv)

        # try closing writer
        try:
            if self.rec_serv:
                self.rec_serv.server.application.wr.close()
                #self.rec_serv.server.application.wr.writer.close()
        except Exception as e:
            traceback.print_exc()

    def init_server(self, port, func, env_var_name=None):
        if port < 0:
            return None

        result = GeventServer(func(), port, handler_class=ws_handler_class)

        if env_var_name:
            os.environ[env_var_name] = 'http://localhost:{0}'.format(
                result.port)
            print(env_var_name + '=' + os.environ[env_var_name], flush=True)

        return result

    def stop_server(self, serv):
        if serv:
            serv.stop()
Example #9
0
                    recording.set_derivs_recording(derivs_recording)

                derivs_rec = derivs_recording.my_id

            crawl_def = SEARCH_CRAWL_DEF.copy()
            crawl_def['coll'] = crawl_def['screenshot_coll'] = crawl_def[
                'text_coll'] = data['coll']
            crawl_def['user_params'] = {
                'user': data['user'],
                'coll': data['coll'],
                'coll_name': data['coll_name'],
                'rec': derivs_rec,
                'type': 'replay-coll',
                # updated later
                'request_ts': '',
                'browser': BROWSER
            }
            crawl_def['name'] = 'text-' + data['user'] + '-' + data['coll']
            crawl_def['seed_urls'] = data['pages']

            print(crawl_def)

            r = requests.post(self.browsertrix_url, json=crawl_def)
            print(r.text)


# =============================================================================
if __name__ == "__main__":
    from webrecorder.rec.worker import Worker
    Worker(SearchAutomation, 120).run()