Beispiel #1
0
def merge(src, dst, mode="no_move"):

    assert_choice("mode", mode, MODES)

    if not src.is_dir() or not dst.is_dir():
        raise ValueError("src and dst must be directories")

    for path in src.rglob("*"):

        if path.is_dir():
            relpath = path.relative_to(src)
            (dst / relpath).mkdir(parents=True, exist_ok=True)
        elif path.is_file():
            relpath = path.relative_to(src)
            (dst / relpath.parent).mkdir(parents=True, exist_ok=True)
            target = dst / relpath
            if target.exists():
                if mode == "fail":
                    raise FileExistsError(fspath(path))
                elif mode == "no_move":
                    pass
                elif mode == "overwrite":
                    replace(fspath(path), fspath(target))
                elif mode == "rename":
                    move_rename(path, target)
            else:
                path.rename(
                    target
                )  # race condition on linux, us renameat2 with RENAME_NOREPLACE?
        else:
            raise RuntimeError("Unhandled file: {}".format(path))

    remove_empty_dirs(fspath(src), onerror=remove_empty_error_log)
Beispiel #2
0
    def get_paths(self,
                  field: str,
                  token: str,
                  scoring: str = "unscored") -> Iterable[OptionalSearchResult]:

        assert_choice("scoring", scoring, SCORING_METHODS)

        docs = self.get_docs(field, token)
        paths: Iterable[OptionalSearchResult]

        if scoring == "unscored":
            paths = ((self.ids2docs[doc_id], i)
                     for i, doc_id in enumerate(docs.keys()))
        elif scoring == "term_freq":
            paths = ((self.ids2docs[doc_id], term_freq)
                     for doc_id, term_freq in docs.items())
        elif scoring == "tfidf":
            if docs:
                num_docs = len(self.table[field])
                inv_doc_freq = log10(num_docs / len(docs))

            paths = ((self.ids2docs[doc_id],
                      self._tfidf(term_freq, inv_doc_freq))
                     for doc_id, term_freq in docs.items())

        return paths
Beispiel #3
0
    def _sorted(self,
                groupname: str,
                paths: Iterable[OptionalSearchResult],
                sortby="path") -> list[SearchResult]:
        assert_choice("sortby", sortby, SORTBY_METHODS)

        grouppaths = list(map(str, self.groups[groupname]))
        try:
            filtered_paths = cast(
                Iterable[SearchResult],
                (
                    (path, score) for path, score in paths
                    if any(fspath(path).startswith(gp)
                           for gp in grouppaths)  # type: ignore[arg-type]
                ),
            )
        except TypeError as e:
            raise RuntimeError(e)

        if sortby == "path":
            return sorted(filtered_paths, key=itemgetter(0), reverse=False)
        elif sortby == "score":
            return sorted(filtered_paths, key=itemgetter(1), reverse=True)
        else:
            assert False
Beispiel #4
0
    def get_episodes_by_bohne_preview(self, bohne_id, order="ASC"):
        # type: (int, str) -> Dict[str, Any]
        """ Returns reduced information about all episodes for the given Bohne.
		"""

        assert_choice("order", order, {"ASC", "DESC"})
        return self._request_single(
            "/v1/media/episode/bybohne/preview/{}".format(bohne_id))
Beispiel #5
0
    def get_newest_episodes_preview(self, order="ASC"):
        # type: (str, ) -> Iterator[Dict[str, Any]]

        assert_choice("order", order, {"ASC", "DESC"})
        return self._request_paged("/v1/media/episode/preview/newest",
                                   50,
                                   False,
                                   order=order)
Beispiel #6
0
    def get_shows(self, sortby="LastEpisode", only=None):
        # type: (str, Optional[str]) -> Iterator[Dict[str, Any]]

        assert_choice("sortby", sortby, {"LastEpisode"})
        assert_choice("only", only, {None, "podcast"})
        return self._request_paged("/v1/media/show/all",
                                   50,
                                   sortby=sortby,
                                   only=only)
Beispiel #7
0
    def get_shows_mini(self, sortby="LastEpisode", only=None):
        # type: (str, Optional[str]) -> List[Dict[str, Any]]
        """ Returns minimal information about all shows.
		"""

        assert_choice("sortby", sortby, {"LastEpisode"})
        assert_choice("only", only, {None, "podcast"})
        return self._request_single("/v1/media/show/preview/mini/all",
                                    sortby=sortby,
                                    only=only)
Beispiel #8
0
    def get_shows_preview(self, sortby="LastEpisode", only=None):
        # type: (str, Optional[str]) -> Iterator[Dict[str, Any]]
        """ Returns paginated, reduced information about all shows.
		"""

        assert_choice("sortby", sortby, {"LastEpisode"})
        assert_choice("only", only, {None, "podcast"})
        return self._request_paged("/v1/media/show/preview/all",
                                   50,
                                   sortby=sortby,
                                   only=only)
Beispiel #9
0
    def get_episodes_by_show_preview(self, show_id, order="ASC"):
        # type: (int, str) -> Iterator[Dict[str, Any]]
        """ Returns reduced information about all episodes for the given show.
		"""

        assert_choice("order", order, {"ASC", "DESC"})
        return self._request_paged(
            "/v1/media/episode/byshow/preview/{}".format(show_id),
            50,
            False,
            order=order)
Beispiel #10
0
    def get_episodes_by_season(self, season_id, order="ASC"):
        # type: (int, str) -> Iterator[Dict[str, Any]]
        """ Returns information about all episodes of a given season.
		"""

        assert_choice("order", order, {"ASC", "DESC"})
        return self._request_paged(
            "/v1/media/episode/byseason/{}".format(season_id),
            50,
            False,
            order=order)
Beispiel #11
0
    def get_episodes_by_bohne(self, bohne_id, order="ASC"):
        # type: (int, str) -> Iterator[Dict[str, Any]]
        """ Returns information about all episodes for the given Bohne.
		"""

        assert_choice("order", order, {"ASC", "DESC"})
        return self._request_paged(
            "/v1/media/episode/bybohne/{}".format(bohne_id),
            50,
            False,
            order=order)
Beispiel #12
0
    def get_paths_op(
            self,
            field: str,
            tokens: Sequence[str],
            setop: Callable[..., set[int]],
            scoring: str = "unscored") -> Iterable[OptionalSearchResult]:

        assert_choice("scoring", scoring, SCORING_METHODS)

        sets = tuple(
            set(self.get_docs(field, token).keys()) for token in tokens)
        docs_op = setop(*sets)
        paths: Iterable[OptionalSearchResult]

        if scoring == "unscored":
            paths = ((self.ids2docs[doc_id], i)
                     for i, doc_id in enumerate(docs_op))

        elif scoring == "term_freq":
            term_freqs = defaultdict(int)  # type: DefaultDict[int, int]
            for token in tokens:
                docs = self.get_docs(field, token)
                for doc_id, term_freq in docs.items():
                    term_freqs[doc_id] += term_freq
            paths = ((self.ids2docs[doc_id], term_freqs[doc_id])
                     for doc_id in docs_op)

        elif scoring == "tfidf":
            tfidf = defaultdict(float)  # type: DefaultDict[int, float]
            for token in tokens:
                docs = self.get_docs(field, token)
                if docs:
                    num_docs = len(self.table[field])
                    inv_doc_freq = log10(num_docs / len(docs))
                for doc_id, term_freq in docs.items():
                    tfidf[doc_id] += self._tfidf(term_freq, inv_doc_freq)

            paths = ((self.ids2docs[doc_id], tfidf[doc_id])
                     for doc_id in docs_op)

        return paths