예제 #1
0
def read_browser_history_json(takeout: TakeoutPath) -> Iterable[Visit]:
    # TODO replace with my.core.kompress after hpi update (or even use some my. function directly?)
    from my.kython.kompress import kexists, kopen
    # not sure if this deserves moving to HPI? it's pretty trivial for now
    spath = 'Takeout/Chrome/BrowserHistory.json'

    if not kexists(takeout, spath):
        logger.warning(f"{spath} is not present in {takeout}... skipping")
        return
    logger.info('processing %s %s', takeout, spath)

    # TODO couls also add spath?
    locator = Loc.file(takeout)

    # TODO this should be supported by HPI now?

    j = None
    with kopen(takeout, spath) as fo:  # TODO iterative parser?
        j = json.load(fo)

    hist = j['Browser History']
    for item in hist:
        url = item['url']
        time = datetime.utcfromtimestamp(item['time_usec'] /
                                         10**6).replace(tzinfo=pytz.utc)
        # TODO any more interesitng info?
        yield Visit(
            url=url,
            dt=time,
            locator=locator,
            debug='Chrome/BrowserHistory.json',
        )
예제 #2
0
def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]:
    # TODO replace with my.core.kompress after hpi update (or even use some my. function directly?)
    from my.kython.kompress import kexists
    # TODO glob
    # TODO not sure about windows path separators??
    spath = 'Takeout/My Activity/' + kind
    if not kexists(takeout, spath):
        logger.warning(f"{spath} is not present in {takeout}... skipping")
        return
    logger.info('processing %s %s', takeout, kind)

    locator = Loc.file(spath)
    from my.google.takeout.html import read_html
    for dt, url, title in read_html(takeout, spath):
        yield Visit(
            url=url,
            dt=dt,
            locator=locator,
            debug=kind,
        )
예제 #3
0
def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]:
    from my.kython.kompress import kexists
    logger = get_logger()
    # TODO glob
    # TODO not sure about windows path separators??
    spath = 'Takeout/My Activity/' + kind
    if not kexists(takeout, spath):
        logger.warning(f"{spath} is not present in {takeout}... skipping")
        return []
    logger.info('processing %s %s', takeout, kind)

    locator = Loc.file(spath)
    from my.google.takeout.html import read_html
    for dt, url, title in read_html(takeout, spath):
        yield Visit(
            url=url,
            dt=dt,
            locator=locator,
            debug=kind,
        )
예제 #4
0
파일: misc.py 프로젝트: obswork/HPI-1
def test_kexists(tmp_path: Path) -> None:
    assert kexists(str(tmp_path / 'file.zip'), 'path/in/archive')
    assert not kexists(str(tmp_path / 'file.zip'), 'path/notin/archive')

    # TODO not sure about this?
    assert not kexists(tmp_path / 'nosuchzip.zip', 'path/in/archive')