Example #1
0
File: __init__.py Project: tg-z/hpi
# TODO might be a good idea to merge across multiple takeouts...
# perhaps even a special takeout module that deals with all of this automatically?
# e.g. accumulate, filter and maybe report useless takeouts?

REQUIRES = ["cssselect", "lxml"]

from itertools import chain
from typing import Set

from my.core.common import Stats, LazyLogger, mcachew, warn_if_empty
from my.core.cachew import cache_dir

from .paths import takeout_input_directories
from .takeout_parser import Results, parse_takeout

logger = LazyLogger(__name__, level="warning")


@mcachew(
    cache_path=lambda: str(cache_dir() / "_merged_google_events"),
    depends_on=lambda: list(sorted(takeout_input_directories())),
    force_file=True,
    logger=logger,
)
def events() -> Results:
    yield from merge_events(*map(parse_takeout, takeout_input_directories()))


@warn_if_empty
def merge_events(*sources: Results) -> Results:
    emitted: Set[int] = set()
Example #2
0
"""
Extracts locations using google_takeout_parser -- no shared code with the deprecated my.location.google
"""

REQUIRES = ["git+https://github.com/seanbreckenridge/google_takeout_parser"]

from typing import Iterator

from my.google.takeout.parser import events, _cachew_depends_on
from google_takeout_parser.models import Location as GoogleLocation

from my.core.common import mcachew, LazyLogger, Stats
from .common import Location

logger = LazyLogger(__name__)


@mcachew(
    depends_on=_cachew_depends_on,
    logger=logger,
)
def locations() -> Iterator[Location]:
    for g in events():
        if isinstance(g, GoogleLocation):
            yield Location(lon=g.lng,
                           lat=g.lat,
                           dt=g.dt,
                           accuracy=g.accuracy,
                           elevation=None)

Example #3
0
    # if the accuracy for the location is more than 5km, don't use
    require_accuracy: float = 5_000


from collections import Counter
from datetime import date, datetime
from functools import lru_cache
from itertools import groupby
from typing import Iterator, NamedTuple, Optional, Tuple, Any, List, Iterable

from more_itertools import seekable
import pytz

from my.core.common import LazyLogger, mcachew, tzdatetime

logger = LazyLogger(__name__, level='warning')


@lru_cache(2)
def _timezone_finder(fast: bool) -> Any:
    if fast:
        # less precise, but faster
        from timezonefinder import TimezoneFinderL as Finder  # type: ignore
    else:
        from timezonefinder import TimezoneFinder as Finder  # type: ignore
    return Finder(in_memory=True)


# todo move to common?
Zone = str
Example #4
0
    from rexport import dal
except ModuleNotFoundError as e:
    from my.core.compat import pre_pip_dal_handler
    dal = pre_pip_dal_handler('rexport', e, config, requires=REQUIRES)
# TODO ugh. this would import too early
# but on the other hand we do want to bring the objects into the scope for easier imports, etc. ugh!
# ok, fair enough I suppose. It makes sense to configure something before using it. can always figure it out later..
# maybe, the config could dynamically detect change and reimport itself? dunno.
###

############################

from typing import List, Sequence, Mapping, Iterator, Any
from my.core.common import mcachew, get_files, LazyLogger, make_dict, Stats

logger = LazyLogger(__name__, level='info')

from pathlib import Path


def inputs() -> Sequence[Path]:
    return get_files(config.export_path)


Uid = dal.Sid  # str
Save = dal.Save
Comment = dal.Comment
Submission = dal.Submission
Upvote = dal.Upvote