# TODO might be a good idea to merge across multiple takeouts... # perhaps even a special takeout module that deals with all of this automatically? # e.g. accumulate, filter and maybe report useless takeouts? REQUIRES = ["cssselect", "lxml"] from itertools import chain from typing import Set from my.core.common import Stats, LazyLogger, mcachew, warn_if_empty from my.core.cachew import cache_dir from .paths import takeout_input_directories from .takeout_parser import Results, parse_takeout logger = LazyLogger(__name__, level="warning") @mcachew( cache_path=lambda: str(cache_dir() / "_merged_google_events"), depends_on=lambda: list(sorted(takeout_input_directories())), force_file=True, logger=logger, ) def events() -> Results: yield from merge_events(*map(parse_takeout, takeout_input_directories())) @warn_if_empty def merge_events(*sources: Results) -> Results: emitted: Set[int] = set()
""" Extracts locations using google_takeout_parser -- no shared code with the deprecated my.location.google """ REQUIRES = ["git+https://github.com/seanbreckenridge/google_takeout_parser"] from typing import Iterator from my.google.takeout.parser import events, _cachew_depends_on from google_takeout_parser.models import Location as GoogleLocation from my.core.common import mcachew, LazyLogger, Stats from .common import Location logger = LazyLogger(__name__) @mcachew( depends_on=_cachew_depends_on, logger=logger, ) def locations() -> Iterator[Location]: for g in events(): if isinstance(g, GoogleLocation): yield Location(lon=g.lng, lat=g.lat, dt=g.dt, accuracy=g.accuracy, elevation=None)
# if the accuracy for the location is more than 5km, don't use require_accuracy: float = 5_000 from collections import Counter from datetime import date, datetime from functools import lru_cache from itertools import groupby from typing import Iterator, NamedTuple, Optional, Tuple, Any, List, Iterable from more_itertools import seekable import pytz from my.core.common import LazyLogger, mcachew, tzdatetime logger = LazyLogger(__name__, level='warning') @lru_cache(2) def _timezone_finder(fast: bool) -> Any: if fast: # less precise, but faster from timezonefinder import TimezoneFinderL as Finder # type: ignore else: from timezonefinder import TimezoneFinder as Finder # type: ignore return Finder(in_memory=True) # todo move to common? Zone = str
from rexport import dal except ModuleNotFoundError as e: from my.core.compat import pre_pip_dal_handler dal = pre_pip_dal_handler('rexport', e, config, requires=REQUIRES) # TODO ugh. this would import too early # but on the other hand we do want to bring the objects into the scope for easier imports, etc. ugh! # ok, fair enough I suppose. It makes sense to configure something before using it. can always figure it out later.. # maybe, the config could dynamically detect change and reimport itself? dunno. ### ############################ from typing import List, Sequence, Mapping, Iterator, Any from my.core.common import mcachew, get_files, LazyLogger, make_dict, Stats logger = LazyLogger(__name__, level='info') from pathlib import Path def inputs() -> Sequence[Path]: return get_files(config.export_path) Uid = dal.Sid # str Save = dal.Save Comment = dal.Comment Submission = dal.Submission Upvote = dal.Upvote