Example #1
0
def test_parser(path: str) -> None:
    path = 'Takeout/' + path
    tpath = get_last_takeout(path=path)
    assert tpath is not None
    results = list(read_html(tpath, path))
    # TODO assert len > 100 or something?
    print(len(results))
Example #2
0
def test_myactivity_search():
    path = 'Takeout/My Activity/Search/MyActivity.html'
    tpath = get_last_takeout(path=path)
    results = list(read_html(tpath, path))

    res = (
        datetime(year=2018, month=12, day=17, hour=8, minute=16, second=18, tzinfo=pytz.utc),
        'https://en.wikipedia.org/wiki/Emmy_Noether&usg=AFQjCNGrSW-iDnVA2OTcLsG3I80H_a6y_Q',
        'Emmy Noether - Wikipedia',
    )
    assert res in results
Example #3
0
def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]:
    from my.core.kompress import kexists
    # TODO glob
    # TODO not sure about windows path separators??
    spath = 'Takeout/My Activity/' + kind
    if not kexists(takeout, spath):
        logger.warning(f"{spath} is not present in {takeout}... skipping")
        return
    logger.info('processing %s %s', takeout, kind)

    locator = Loc.file(spath)
    from my.google.takeout.html import read_html
    for dt, url, title in read_html(takeout, spath):
        yield Visit(
            url=url,
            dt=dt,
            locator=locator,
            debug=kind,
        )