Ejemplo n.º 1
0
 def test_persist(self):
     copy_tst_data("I_1_alpha", "I_1")
     copy_tst_data("III_1_alpha", "III_1")
     registers = Registers()
     register_I_1 = registers["I,1"]
     register_I_1._lemmas[0]._chapters[0]._dict["author"] = "Siegfried"
     register_III_1 = registers["III,1"]
     register_III_1._lemmas[0]._chapters[0]._dict["author"] = "Siegfried"
     registers.persist()
     with open(DataRepo.get_data_path().joinpath("I_1.json"), mode="r", encoding="utf-8") as register_file:
         self.assertTrue("Siegfried" in register_file.read())
     with open(DataRepo.get_data_path().joinpath("III_1.json"), mode="r", encoding="utf-8") as register_file:
         self.assertTrue("Siegfried" in register_file.read())
Ejemplo n.º 2
0
    def test_persist(self):
        copy_tst_data("I_1_two_entries", "I_1")
        register = VolumeRegister(Volumes()["I,1"], Authors())
        register._lemmas[0]._lemma_dict["previous"] = None
        register._lemmas[0]._chapters[0]._dict["author"] = "ÄäÖöÜüß"
        register.persist()
        expect = """[
  {
    "lemma": "Aal",
    "next": "Aarassos",
    "proof_read": 3,
    "short_description": "Ein Fisch",
    "chapters": [
      {
        "start": 1,
        "end": 4,
        "author": "ÄäÖöÜüß"
      }
    ]
  },
  {
    "lemma": "Aarassos",
    "previous": "Aal",
    "proof_read": 2,
    "chapters": [
      {
        "start": 4,
        "end": 4,
        "author": "Abert"
      }
    ]
  }
]"""
        with open(DataRepo.get_data_path().joinpath("I_1.json"), mode="r", encoding="utf-8") as register_file:
            compare(expect, register_file.read())
Ejemplo n.º 3
0
 def test_clean_authors(self):
     cleaner = CleanAuthors()
     cleaner.delete_authors_without_mapping()
     with open(BASE_PATH.joinpath("test_data/register_stubs/authors_clean_expection.json"), encoding="utf-8") \
             as expection_file:
         with open(DataRepo.get_data_path().joinpath("authors.json"), encoding="utf-8") as cleaned_file:
             compare(json.load(expection_file), json.load(cleaned_file))
Ejemplo n.º 4
0
 def test_remap(self):
     cleaner = CleanAuthors()
     cleaner.remap()
     with open(BASE_PATH.joinpath(
             "test_data/register_stubs/authors_mapping_remap_expection.json"), encoding="utf-8") \
             as expection_file, \
             open(DataRepo.get_data_path().joinpath("authors_mapping.json"), encoding="utf-8") as cleaned_file:
         compare(json.load(expection_file), json.load(cleaned_file))
Ejemplo n.º 5
0
class VolumeRegister(Register):
    def __init__(self, volume: Volume, authors: Authors):
        super().__init__()
        self._authors = authors
        self._volume = volume
        self.repo = DataRepo()
        with open(
                self.repo.get_data_path().joinpath(f"{volume.file_name}.json"),
                "r",
                encoding="utf-8") as json_file:
            lemma_list = json.load(json_file)
        for lemma in lemma_list:
            self._lemmas.append(Lemma(lemma, self._volume, self._authors))

    def __repr__(self):
        return f"<{self.__class__.__name__} - volume:{self.volume.name}, lemmas:{len(self.lemmas)}>"

    def __len__(self):
        return len(self._lemmas)

    @property
    def volume(self) -> Volume:
        return self._volume

    @property
    def authors(self) -> Authors:
        return self._authors

    @property
    def lemmas(self) -> List[Lemma]:
        return self._lemmas

    def _get_header(self) -> str:
        header = ["RERegister"]
        header.append(f"BAND={self.volume.name}")
        # calculate pre and post issue
        volumes = Volumes()
        vg, nf = volumes.get_neighbours(self.volume.name)
        header.append(f"VG={vg}")
        header.append(f"NF={nf}")
        header.append(f"SUM={len(self.lemmas)}")
        # calculate proof_read status
        fer, kor, unk = self.proof_read
        header.append(f"UNK={unk}")
        header.append(f"KOR={kor}")
        header.append(f"FER={fer}")
        return "{{" + "\n|".join(header) + "\n}}\n"

    def get_register_str(self) -> str:
        return f"{self._get_header()}\n{self._get_table(print_volume=False)}\n[[Kategorie:RE:Register|!]]"

    def persist(self):
        persist_list = []
        for lemma in self.lemmas:
            persist_list.append(lemma.lemma_dict)
        with open(self.repo.get_data_path().joinpath(
                f"{self._volume.file_name}.json"),
                  "w",
                  encoding="utf-8") as json_file:
            json.dump(persist_list, json_file, indent=2, ensure_ascii=False)

    def __getitem__(self, idx: int) -> Lemma:
        return self.lemmas[idx]

    def get_lemma_by_name(self,
                          lemma_name: str,
                          self_supplement: bool = False) -> Optional[Lemma]:
        found_before = False
        for lemma in self.lemmas:
            if lemma["lemma"] == lemma_name:
                if found_before or not self_supplement:
                    return lemma
                found_before = True
        return None

    def get_lemma_by_sort_key(
            self,
            sort_key: str,
            self_supplement: bool = False) -> Optional[Lemma]:
        # normalize it
        sort_key = Lemma.make_sort_key(sort_key)
        found_before = False
        for lemma in self.lemmas:
            if lemma.sort_key == sort_key:
                if found_before or not self_supplement:
                    return lemma
                found_before = True
        return None

    def get_index_of_lemma(self,
                           lemma_input: Union[str, Lemma],
                           self_supplement: bool = False) -> Optional[int]:
        if isinstance(lemma_input, str):
            lemma = self.get_lemma_by_name(lemma_input, self_supplement)
        else:
            lemma = lemma_input
        if lemma:
            return self.lemmas.index(lemma)
        return None

    def __contains__(self, lemma_name: str) -> bool:
        return bool(self.get_lemma_by_name(lemma_name))

    @staticmethod
    def normalize_sort_key(lemma_dict: LemmaDict) -> str:
        if "sort_key" in lemma_dict:
            return Lemma.make_sort_key(lemma_dict["sort_key"])
        return Lemma.make_sort_key(lemma_dict["lemma"])
Ejemplo n.º 6
0
class Authors:
    def __init__(self, update_data=False):
        self.data_repo = DataRepo()
        if update_data:
            self.data_repo.pull()
        with open(self.data_repo.get_data_path().joinpath(
                "authors_mapping.json"),
                  "r",
                  encoding="utf-8") as json_file:
            self._mapping = json.load(json_file)
        self._authors: Dict[str, Author] = {}
        with open(self.data_repo.get_data_path().joinpath("authors.json"),
                  "r",
                  encoding="utf-8") as json_file:
            json_dict = json.load(json_file)
            for author in json_dict:
                self._authors[author] = Author(author, json_dict[author])

    def __iter__(self) -> Generator[Author, None, None]:
        for author in sorted(
                self.authors_dict.values(),
                key=lambda item: f"{item.last_name}, {item.first_name}"):
            if not author.redirect:
                yield author

    @lru_cache(maxsize=1000)
    def get_author_by_mapping(self, name: str, issue: str) -> List[Author]:
        author_list = []
        with contextlib.suppress(KeyError):
            mapping = self._mapping[name]
            if isinstance(mapping, dict):
                try:
                    mapping = mapping[issue]
                except KeyError:
                    mapping = mapping["*"]
            if isinstance(mapping, str):
                mapping = [mapping]
            for item in mapping:
                author_list.append(self.get_author(item))
        return author_list

    def get_author(self, author_key: str) -> Author:
        author = self._authors[author_key.replace("|", "")]
        if author.redirect:
            author = self._authors[author.redirect]
        return author

    def set_mappings(self, mapping: Dict[str, str]):
        self._mapping.update(mapping)

    def set_author(self, mapping: Dict[str, AuthorDict]):
        for author_key in mapping:
            if author_key in self._authors:
                self._authors[author_key].update_internal_dict(
                    mapping[author_key])
            else:
                self._authors[author_key] = Author(author_key,
                                                   mapping[author_key])

    def _to_dict(self) -> Dict[str, AuthorDict]:
        author_dict = {}
        for dict_key in sorted(self._authors.keys()):
            author_dict[dict_key] = self._authors[dict_key].to_dict()
        return author_dict

    def persist(self):
        with open(self.data_repo.get_data_path().joinpath(
                "authors_mapping.json"),
                  "w",
                  encoding="utf-8") as json_file:
            json.dump(self._mapping,
                      json_file,
                      sort_keys=True,
                      indent=2,
                      ensure_ascii=False)
        with open(self.data_repo.get_data_path().joinpath("authors.json"),
                  "w",
                  encoding="utf-8") as json_file:
            json.dump(self._to_dict(),
                      json_file,
                      sort_keys=True,
                      indent=2,
                      ensure_ascii=False)

    @property
    def authors_dict(self) -> Dict[str, Author]:
        return self._authors

    @property
    def authors_mapping(self):
        return self._mapping