def test_parser(path: str) -> None: path = 'Takeout/' + path tpath = get_last_takeout(path=path) assert tpath is not None results = list(read_html(tpath, path)) # TODO assert len > 100 or something? print(len(results))
def test_myactivity_search(): path = 'Takeout/My Activity/Search/MyActivity.html' tpath = get_last_takeout(path=path) results = list(read_html(tpath, path)) res = ( datetime(year=2018, month=12, day=17, hour=8, minute=16, second=18, tzinfo=pytz.utc), 'https://en.wikipedia.org/wiki/Emmy_Noether&usg=AFQjCNGrSW-iDnVA2OTcLsG3I80H_a6y_Q', 'Emmy Noether - Wikipedia', ) assert res in results
def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]: from my.core.kompress import kexists # TODO glob # TODO not sure about windows path separators?? spath = 'Takeout/My Activity/' + kind if not kexists(takeout, spath): logger.warning(f"{spath} is not present in {takeout}... skipping") return logger.info('processing %s %s', takeout, kind) locator = Loc.file(spath) from my.google.takeout.html import read_html for dt, url, title in read_html(takeout, spath): yield Visit( url=url, dt=dt, locator=locator, debug=kind, )