def test_concurrent_indexing(tmp_path: Path, execution_number) -> None: cfg_slow = tmp_path / 'config_slow.py' cfg_fast = tmp_path / 'config_fast.py' cfg = dedent(f''' OUTPUT_DIR = r'{tmp_path}' from promnesia.common import Source from promnesia.sources import demo SOURCES = [Source(demo.index, count=COUNT)] ''') cfg_slow.write_text(cfg.replace('COUNT', '100000')) cfg_fast.write_text(cfg.replace('COUNT', '100')) # init it first, to create the database # TODO ideally this shouldn't be necessary but it's reasonable that people would already have the index # otherwise it would fail at db creation point.. which is kinda annoying to work around # todo in principle can work around same way as in cachew, by having a loop around PRAGMA WAL command? check_call(promnesia_bin('index', '--config', cfg_fast)) # run in the background with Popen(promnesia_bin('index', '--config', cfg_slow)) as slow: while slow.poll() is None: # create a bunch of 'smaller' indexers running in parallel fasts = [ Popen(promnesia_bin('index', '--config', cfg_fast)) for _ in range(10) ] for fast in fasts: assert fast.wait() == 0, fast # should succeed assert slow.poll() == 0, slow
def wserver( db: Optional[PathIsh] = None ): # TODO err not sure what type should it be... -> ContextManager[Helper]: port = str(next_port()) cmd = [ 'serve', '--quiet', '--port', port, *([] if db is None else ['--db', str(db)]), ] with tmp_popen(promnesia_bin(*cmd)) as server: # wait till ready st = f'http://localhost:{port}/status' for a in range(50): try: requests.get(st).json() break except: time.sleep(0.1) else: raise RuntimeError("Cooldn't connect to '{st}' after 50 attempts") print("Started server up, db: {db}".format(db=db), file=sys.stderr) yield Helper(port=port) print("Done with the server", file=sys.stderr)
def test_demo() -> None: import requests with tmp_popen(promnesia_bin('demo', '--port', '16789', ox_hugo_data())): # FIXME why does it want post?? time.sleep(2) # meh.. need a generic helper to wait till ready... res = {} for attempt in range(30): time.sleep(1) try: res = requests.post( "http://localhost:16789/search", data=dict( url="https://github.com/kaushalmodi/ox-hugo/issues") ).json() break except: continue else: raise RuntimeError("Couldn't connect to the server") vis = res['visits'] assert len(vis) > 50, vis mds = [ x for x in vis if x['locator']['title'] == 'content/posts/citations-example-toml.md'.replace('/', os.sep) ] orgs = [ x for x in vis if x['locator']['title'].startswith( 'content-org/single-posts/empty_tag.org'.replace('/', os.sep)) ] assert len(mds) == 1 assert len(orgs) == 1
def test_basic(tmp_path: Path) -> None: cfg = tmp_path / 'config.py' cfg.write_text("SOURCES = ['promnesia.sources.demo']") check_call(promnesia_bin('index', '--config', cfg)) with wserver() as helper: response = post(f'http://localhost:{helper.port}/visits', 'url=whatever') assert response['visits'] == []
def test_basic(tmp_path: Path): cfg = tmp_path / 'config.py' # TODO put in user home dir? annoying in test... cfg.write_text(''' SOURCES = ['promnesia.sources.demo'] ''') check_call(promnesia_bin('index', '--config', cfg)) with wserver() as helper: response = post(f'http://localhost:{helper.port}/visits', 'url=whatever') assert response['visits'] == []
def _test_helper(tmp_path): tdir = Path(tmp_path) cache_dir = tdir / 'cache' cache_dir.mkdir() # TODO extract that into index_takeout? # TODO ugh. quite hacky... template_config = tdata('test_config.py') copy(template_config, tdir) config = tdir / 'test_config.py' with config.open('a') as fo: fo.write(f""" OUTPUT_DIR = '{tdir}' CACHE_DIR = '{cache_dir}' """) check_call(promnesia_bin('index', '--config', config)) with wserver(db=tdir / 'promnesia.sqlite') as srv: yield srv
def test_query_while_indexing(tmp_path: Path) -> None: cfg = tmp_path / 'config.py' indexing_cmd = promnesia_bin('index', '--config', cfg) # just trigger the database cfg.write_text( dedent(f''' OUTPUT_DIR = r'{tmp_path}' SOURCES = ['promnesia.sources.demo'] ''')) check_call(indexing_cmd) cfg.write_text( dedent(f''' OUTPUT_DIR = r'{tmp_path}' from promnesia.common import Source from promnesia.sources import demo # index stupid amount of visits to increase time spent in database serialization SOURCES = [Source(demo.index, count=100000)] ''')) with wserver(db=tmp_path / 'promnesia.sqlite') as helper: status = lambda: post(f'http://localhost:{helper.port}/status') # precondition -- db should be healthy r = status() assert 0 < r['stats']['total_visits'] < 100000, r # now run the indexing (asynchronously) # from subprocess import Popen with Popen(indexing_cmd): # and hammer the backend to increase likelihood of race condition # not ideal -- doesn't really 'guarantee' to catch races, but good enough for _ in range(100): r = status() assert r['stats'].get('total_visits', 0) > 0, r # after indexing finished, new visits should be in the db r = status() assert r['stats']['total_visits'] >= 100000, r