Ejemplo n.º 1
0
def inputs() -> Iterable[Tuple[datetime, Path]]:
    """Returns all todo/done.txt files"""
    dones = get_files(config.export_path)
    res: List[Tuple[datetime, Path]] = []
    for todone in dones:
        dt = datetime.strptime(todone.stem.split("-")[0], "%Y%m%dT%H%M%SZ")
        res.append((dt, todone))
    return res
Ejemplo n.º 2
0
Archivo: browsing.py Proyecto: tg-z/hpi
def _live_visits() -> List[Visit]:
    visits: List[Visit] = []
    live_dbs = get_files(config.live_databases or "")
    logger.debug(f"Live databases: {live_dbs}")
    for live_db in live_dbs:
        conn = sqlite_copy_and_open(live_db)
        try:
            # consume generator early,
            # so the connection doesn't close before we read the visits
            visits.extend(list(read_visits(conn)))
        finally:
            conn.close()
    logger.debug(f"Read {len(visits)} live visits")
    return visits
Ejemplo n.º 3
0
Archivo: albums.py Proyecto: tg-z/hpi
def _current_albums_export_path() -> Path:
    return get_files(config.export_path)[0]
Ejemplo n.º 4
0
Archivo: league.py Proyecto: tg-z/hpi
def inputs() -> Sequence[Path]:
    """
    Get the parsed league of legends JSON files
    (output of https://github.com/seanbreckenridge/lolexport/blob/master/lolexport/parse.py)
    """
    return get_files(config.export_path)
Ejemplo n.º 5
0
def inputs() -> Iterable[Path]:
    yield from get_files(config.export_path)
Ejemplo n.º 6
0
Archivo: facebook.py Proyecto: tg-z/hpi
def events() -> Results:
    # get files 2 levels deep into the export
    gdpr_dir = str(Path(config.gdpr_dir).expanduser().absolute())  # expand path
    files = chain(*map(lambda f: f.rglob("*"), get_files(config.gdpr_dir)))
    handler_map = {
        "about_you/face_recog": None,
        "about_you/friend_peer": None,
        "about_you/your_address_books": _parse_address_book,
        "ads": None,
        "apps_and_websites/apps_and_websites": _parse_installed_apps,
        "apps_and_websites/posts_from_apps_and_websites": _parse_app_posts,
        "comments/comments": _parse_group_comments,
        "events/event_invitations": None,  # just parse the ones I accepted
        "events/your_event_responses": _parse_joined_events,
        "following_and": None,  # I have no data here
        "friends/friends": _parse_friends,
        "friends/received_friend_requests": None,  # Not interested
        "friends/rejected_friend": None,  # Not interested
        "friends/sent_friend": None,  # Not interested
        "friends/removed_": _parse_deleted_friends,
        "groups/your_group_membership": _parse_group_activity,
        "groups/your_posts_and_comments": _parse_group_posts,
        "likes_and_reactions/pages": _parse_page_likes,
        "likes_and_reactions/posts_and_comments": _parse_reactions,
        "location": None,  # No data
        "marketplace": None,
        "other_activity": None,
        "pages": None,
        "payment_history": None,
        "photos_and_videos/album": _parse_photo_ips,  # ip info for where images were uplodaed from
        "photos_and_videos/": None,  # pull these out in my/photos.py
        "profile_information/profile_information.json": None,
        "saved_items": None,
        "stories": None,
        "your_places": None,
        "posts/your_posts": _parse_posts,
        "search_history": _parse_search_history,
        "profile_information/profile_update_history": _parse_posts,
        "messages/stickers_used": None,  # no one needs stickers o_o
        "messages/": _parse_conversation,
        "security_and_login_information/account_activity": _parse_account_activity,
        "security_and_login_information/authorized_logins": _parse_authorized_logins,
        "security_and_login_information/administrative_records": _parse_admin_records,
        "security_and_login_information/where_you": None,
        "security_and_login_information/used_ip_addresses": None,
        "security_and_login_information/account_status_changes": None,
        "security_and_login_information/logins_and_logouts": None,
        "security_and_login_information/login_protection": None,
        "security_and_login_information/datr_cookie": None,
        "posts/other_people's_posts_to_your_timeline": None,  # maybe implement this? OtherComment NamedTuple? Comment should just be mine
    }
    for f in files:
        handler: Any
        for prefix, h in handler_map.items():
            if not str(f).startswith(os.path.join(gdpr_dir, prefix)):
                continue
            handler = h
            break
        else:
            if f.is_dir():
                # rglob("*") matches directories, as well as any subredirectories/json files in those
                # this is here exclusively for the messages dir, which has a larger structure
                # json files from inside the dirs are still picked up by rglob
                continue
            else:
                e = RuntimeError(f"Unhandled file: {f}")
                logger.debug(str(e))
                yield e
                continue

        if handler is None:
            # explicitly ignored
            continue

        if f.suffix != ".json":
            continue

        j = json.loads(f.read_text())
        yield from handler(j)
Ejemplo n.º 7
0
def project_euler_inputs() -> Sequence[Path]:
    return get_files(config.export_path)
Ejemplo n.º 8
0
Archivo: battery.py Proyecto: tg-z/hpi
def inputs() -> Sequence[Path]:  # type: ignore[misc]
    """Returns all battery log/datafiles"""
    yield from get_files(config.export_path)
Ejemplo n.º 9
0
def inputs() -> Sequence[Path]:  # type: ignore[misc]
    yield from get_files(config.export_path)
Ejemplo n.º 10
0
def _latest_input() -> Path:
    """Since the exports are complete exports, can just use the most recent export"""
    return last(
        sorted(get_files(config.export_path), key=lambda p: p.stat().st_mtime))
Ejemplo n.º 11
0
def inputs() -> Sequence[Path]:
    # TODO ignoring could be handled on get_files/user config site as well?..
    all_files = get_files(config.paths, glob='**/*.pdf')
    return [p for p in all_files if not config.is_ignored(p)]
Ejemplo n.º 12
0
Archivo: paths.py Proyecto: tg-z/hpi
def takeout_input_directories() -> Iterator[Path]:
    check_for_new_takeouts()
    yield from get_files(config.takeout_path)
Ejemplo n.º 13
0
def inputs() -> Sequence[Path]:
    all_files = get_files(config.paths, glob='**/*.pdf')
    return [p for p in all_files if not config.is_ignored(p)]
Ejemplo n.º 14
0
def mailboxes() -> List[Path]:
    return list(get_files(config.mailboxes))
Ejemplo n.º 15
0
def inputs() -> Sequence[Path]:
    return get_files(user_config.paths)
Ejemplo n.º 16
0
Archivo: zsh.py Proyecto: tg-z/hpi
def backup_inputs() -> Sequence[Path]:
    return list(get_files(config.export_path))
Ejemplo n.º 17
0
Archivo: spotify.py Proyecto: tg-z/hpi
def inputs(gdpr_dir: Optional[PathIsh] = None) -> Sequence[Path]:
    chosen: PathIsh = gdpr_dir if gdpr_dir is not None else config.gdpr_dir
    echosen = Path(chosen).expanduser().absolute()
    return get_files(echosen, glob="*.json")
Ejemplo n.º 18
0
def accounts() -> Sequence[Path]:
    accounts = []
    for f in get_files(config.export_path):
        with match_structure(f, EXPECTED) as match:
            accounts.extend(list(match))
    return accounts
Ejemplo n.º 19
0
def inputs() -> Sequence[Path]:
    return get_files(config.export_path)
Ejemplo n.º 20
0
Archivo: trakt.py Proyecto: tg-z/hpi
def _latest_input() -> Path:
    """Since the exports are complete exports, can just use the most recent export"""
    return last(sorted(get_files(config.export_path)))