def __init__(self, update_data=False): self.repo = DataRepo() if update_data: self.repo.pull() self._authors: Authors = Authors() self._registers: Dict[str, VolumeRegister] = OrderedDict() self._alphabetic_registers: Dict[str, AlphabeticRegister] = OrderedDict() for volume in Volumes().all_volumes: with contextlib.suppress(FileNotFoundError): self._registers[volume.name] = VolumeRegister( volume, self._authors)
def __init__(self, volume: Volume, authors: Authors): super().__init__() self._authors = authors self._volume = volume self.repo = DataRepo() with open( self.repo.get_data_path().joinpath(f"{volume.file_name}.json"), "r", encoding="utf-8") as json_file: lemma_list = json.load(json_file) for lemma in lemma_list: self._lemmas.append(Lemma(lemma, self._volume, self._authors))
def test_persist(self): copy_tst_data("I_1_alpha", "I_1") copy_tst_data("III_1_alpha", "III_1") registers = Registers() register_I_1 = registers["I,1"] register_I_1._lemmas[0]._chapters[0]._dict["author"] = "Siegfried" register_III_1 = registers["III,1"] register_III_1._lemmas[0]._chapters[0]._dict["author"] = "Siegfried" registers.persist() with open(DataRepo.get_data_path().joinpath("I_1.json"), mode="r", encoding="utf-8") as register_file: self.assertTrue("Siegfried" in register_file.read()) with open(DataRepo.get_data_path().joinpath("III_1.json"), mode="r", encoding="utf-8") as register_file: self.assertTrue("Siegfried" in register_file.read())
def test_persist(self): copy_tst_data("I_1_two_entries", "I_1") register = VolumeRegister(Volumes()["I,1"], Authors()) register._lemmas[0]._lemma_dict["previous"] = None register._lemmas[0]._chapters[0]._dict["author"] = "ÄäÖöÜüß" register.persist() expect = """[ { "lemma": "Aal", "next": "Aarassos", "proof_read": 3, "short_description": "Ein Fisch", "chapters": [ { "start": 1, "end": 4, "author": "ÄäÖöÜüß" } ] }, { "lemma": "Aarassos", "previous": "Aal", "proof_read": 2, "chapters": [ { "start": 4, "end": 4, "author": "Abert" } ] } ]""" with open(DataRepo.get_data_path().joinpath("I_1.json"), mode="r", encoding="utf-8") as register_file: compare(expect, register_file.read())
def __init__(self, update_data=False): self.data_repo = DataRepo() if update_data: self.data_repo.pull() with open(self.data_repo.get_data_path().joinpath( "authors_mapping.json"), "r", encoding="utf-8") as json_file: self._mapping = json.load(json_file) self._authors: Dict[str, Author] = {} with open(self.data_repo.get_data_path().joinpath("authors.json"), "r", encoding="utf-8") as json_file: json_dict = json.load(json_file) for author in json_dict: self._authors[author] = Author(author, json_dict[author])
def test_clean_authors(self): cleaner = CleanAuthors() cleaner.delete_authors_without_mapping() with open(BASE_PATH.joinpath("test_data/register_stubs/authors_clean_expection.json"), encoding="utf-8") \ as expection_file: with open(DataRepo.get_data_path().joinpath("authors.json"), encoding="utf-8") as cleaned_file: compare(json.load(expection_file), json.load(cleaned_file))
def test_remap(self): cleaner = CleanAuthors() cleaner.remap() with open(BASE_PATH.joinpath( "test_data/register_stubs/authors_mapping_remap_expection.json"), encoding="utf-8") \ as expection_file, \ open(DataRepo.get_data_path().joinpath("authors_mapping.json"), encoding="utf-8") as cleaned_file: compare(json.load(expection_file), json.load(cleaned_file))
class VolumeRegister(Register): def __init__(self, volume: Volume, authors: Authors): super().__init__() self._authors = authors self._volume = volume self.repo = DataRepo() with open( self.repo.get_data_path().joinpath(f"{volume.file_name}.json"), "r", encoding="utf-8") as json_file: lemma_list = json.load(json_file) for lemma in lemma_list: self._lemmas.append(Lemma(lemma, self._volume, self._authors)) def __repr__(self): return f"<{self.__class__.__name__} - volume:{self.volume.name}, lemmas:{len(self.lemmas)}>" def __len__(self): return len(self._lemmas) @property def volume(self) -> Volume: return self._volume @property def authors(self) -> Authors: return self._authors @property def lemmas(self) -> List[Lemma]: return self._lemmas def _get_header(self) -> str: header = ["RERegister"] header.append(f"BAND={self.volume.name}") # calculate pre and post issue volumes = Volumes() vg, nf = volumes.get_neighbours(self.volume.name) header.append(f"VG={vg}") header.append(f"NF={nf}") header.append(f"SUM={len(self.lemmas)}") # calculate proof_read status fer, kor, unk = self.proof_read header.append(f"UNK={unk}") header.append(f"KOR={kor}") header.append(f"FER={fer}") return "{{" + "\n|".join(header) + "\n}}\n" def get_register_str(self) -> str: return f"{self._get_header()}\n{self._get_table(print_volume=False)}\n[[Kategorie:RE:Register|!]]" def persist(self): persist_list = [] for lemma in self.lemmas: persist_list.append(lemma.lemma_dict) with open(self.repo.get_data_path().joinpath( f"{self._volume.file_name}.json"), "w", encoding="utf-8") as json_file: json.dump(persist_list, json_file, indent=2, ensure_ascii=False) def __getitem__(self, idx: int) -> Lemma: return self.lemmas[idx] def get_lemma_by_name(self, lemma_name: str, self_supplement: bool = False) -> Optional[Lemma]: found_before = False for lemma in self.lemmas: if lemma["lemma"] == lemma_name: if found_before or not self_supplement: return lemma found_before = True return None def get_lemma_by_sort_key( self, sort_key: str, self_supplement: bool = False) -> Optional[Lemma]: # normalize it sort_key = Lemma.make_sort_key(sort_key) found_before = False for lemma in self.lemmas: if lemma.sort_key == sort_key: if found_before or not self_supplement: return lemma found_before = True return None def get_index_of_lemma(self, lemma_input: Union[str, Lemma], self_supplement: bool = False) -> Optional[int]: if isinstance(lemma_input, str): lemma = self.get_lemma_by_name(lemma_input, self_supplement) else: lemma = lemma_input if lemma: return self.lemmas.index(lemma) return None def __contains__(self, lemma_name: str) -> bool: return bool(self.get_lemma_by_name(lemma_name)) @staticmethod def normalize_sort_key(lemma_dict: LemmaDict) -> str: if "sort_key" in lemma_dict: return Lemma.make_sort_key(lemma_dict["sort_key"]) return Lemma.make_sort_key(lemma_dict["lemma"])
def tearDownClass(cls): clear_tst_path(renew_path=False) DataRepo.mock_data(False)
def setUpClass(cls): DataRepo.mock_data(True) clear_tst_path()
class Authors: def __init__(self, update_data=False): self.data_repo = DataRepo() if update_data: self.data_repo.pull() with open(self.data_repo.get_data_path().joinpath( "authors_mapping.json"), "r", encoding="utf-8") as json_file: self._mapping = json.load(json_file) self._authors: Dict[str, Author] = {} with open(self.data_repo.get_data_path().joinpath("authors.json"), "r", encoding="utf-8") as json_file: json_dict = json.load(json_file) for author in json_dict: self._authors[author] = Author(author, json_dict[author]) def __iter__(self) -> Generator[Author, None, None]: for author in sorted( self.authors_dict.values(), key=lambda item: f"{item.last_name}, {item.first_name}"): if not author.redirect: yield author @lru_cache(maxsize=1000) def get_author_by_mapping(self, name: str, issue: str) -> List[Author]: author_list = [] with contextlib.suppress(KeyError): mapping = self._mapping[name] if isinstance(mapping, dict): try: mapping = mapping[issue] except KeyError: mapping = mapping["*"] if isinstance(mapping, str): mapping = [mapping] for item in mapping: author_list.append(self.get_author(item)) return author_list def get_author(self, author_key: str) -> Author: author = self._authors[author_key.replace("|", "")] if author.redirect: author = self._authors[author.redirect] return author def set_mappings(self, mapping: Dict[str, str]): self._mapping.update(mapping) def set_author(self, mapping: Dict[str, AuthorDict]): for author_key in mapping: if author_key in self._authors: self._authors[author_key].update_internal_dict( mapping[author_key]) else: self._authors[author_key] = Author(author_key, mapping[author_key]) def _to_dict(self) -> Dict[str, AuthorDict]: author_dict = {} for dict_key in sorted(self._authors.keys()): author_dict[dict_key] = self._authors[dict_key].to_dict() return author_dict def persist(self): with open(self.data_repo.get_data_path().joinpath( "authors_mapping.json"), "w", encoding="utf-8") as json_file: json.dump(self._mapping, json_file, sort_keys=True, indent=2, ensure_ascii=False) with open(self.data_repo.get_data_path().joinpath("authors.json"), "w", encoding="utf-8") as json_file: json.dump(self._to_dict(), json_file, sort_keys=True, indent=2, ensure_ascii=False) @property def authors_dict(self) -> Dict[str, Author]: return self._authors @property def authors_mapping(self): return self._mapping
class Registers: def __init__(self, update_data=False): self.repo = DataRepo() if update_data: self.repo.pull() self._authors: Authors = Authors() self._registers: Dict[str, VolumeRegister] = OrderedDict() self._alphabetic_registers: Dict[str, AlphabeticRegister] = OrderedDict() for volume in Volumes().all_volumes: with contextlib.suppress(FileNotFoundError): self._registers[volume.name] = VolumeRegister( volume, self._authors) def __getitem__(self, item) -> VolumeRegister: return self._registers[item] def persist(self): for register in self._registers.values(): register.persist() @property def alphabetic(self) -> Generator[AlphabeticRegister, None, None]: for idx, start in enumerate(RE_ALPHABET): end = "zzzzzz" before_start = None after_next_start = None with contextlib.suppress(IndexError): end = RE_ALPHABET[idx + 1] with contextlib.suppress(IndexError): before_start = RE_ALPHABET[idx - 1] with contextlib.suppress(IndexError): after_next_start = RE_ALPHABET[idx + 2] yield AlphabeticRegister(start, end, before_start, after_next_start, self._registers) @property def author(self) -> Generator[AuthorRegister, None, None]: for author in self.authors: register = AuthorRegister(author, self.authors, self._registers) if len(register) > 0: yield register @property def short(self) -> Generator[ShortRegister, None, None]: for main_volume in Volumes().main_volumes: register = ShortRegister(main_volume, self._registers) yield register @property def pd(self) -> Generator[PublicDomainRegister, None, None]: current_year = datetime.now().year for year in range(current_year - 5, current_year + 5): register = PublicDomainRegister(year, self._authors, self._registers) yield register @property def volumes(self) -> Dict[str, VolumeRegister]: return self._registers @property def authors(self) -> Authors: return self._authors
def setUpClass(cls): DataRepo.mock_data(True) clear_tst_path() copy_tst_data("authors", "authors") copy_tst_data("authors_mapping", "authors_mapping")