def _parse_proper_source(self, partial_path: str, lang, lookup_section=None) -> Proper: """ Read the file and organize the content as a list of dictionaries where `[Section]` becomes an `id` key and each line below - an item of a `body` list. Resolve references like `@Sancti/02-02:Evangelium`. """ parsed_source: ParsedSource = self._parse_source( partial_path, lang, lookup_section) proper = Proper(self.proper_id, lang, parsed_source) # Reference in Rule section in 'vide' or 'ex' clause - load all sections # from the referenced file and get sections that are not explicitly defined in the current proper. vide = proper.get_rule('vide') if vide: if '/' in vide: nested_path = self._get_full_path(f'{vide}.txt', lang) else: for subdir in ('Commune', 'Tempora'): nested_path = self._get_full_path(f'{subdir}/{vide}.txt', lang) if nested_path: break if not nested_path: raise ProperNotFound(f'Proper from vide not found {vide}.') proper.merge(self._parse_source(nested_path, lang=lang)) # Moving data from "Comment" section up as direct properties of a Proper object parsed_comment: dict = self._parse_comment( proper.pop_section('Comment')) try: proper.title = self.translations[lang].TITLES[self.proper_id] except KeyError: # Handling very rare case when proper's source exists but rank or color in the ID is invalid raise ProperNotFound(f"Proper {self.proper_id} not found") proper.description = parsed_comment['description'] proper.additional_info = parsed_comment['additional_info'] proper.supplements = self.translations[lang].SUPPLEMENTS.get( self.proper_id, []) proper = self._add_prefaces(proper, lang) proper = self._filter_sections(proper) proper = self._amend_sections_contents(proper) proper = self._translate_section_titles(proper, lang) return proper
def parse(self) -> Tuple[Proper, Proper]: self.translations[self.lang] = TRANSLATION[self.lang] self.translations[LANGUAGE_LATIN] = TRANSLATION[LANGUAGE_LATIN] self.prefaces[self.lang] = self._parse_source('Ordo/Prefationes.txt', self.lang) self.prefaces[LANGUAGE_LATIN] = self._parse_source('Ordo/Prefationes.txt', lang=LANGUAGE_LATIN) partial_path = self._get_partial_path() try: proper_vernacular: Proper = self._parse_proper_source(partial_path, self.lang) proper_latin: Proper = self._parse_proper_source(partial_path, LANGUAGE_LATIN) except FileNotFoundError as e: raise ProperNotFound(f'Proper `{e.filename}` not found.') return proper_vernacular, proper_latin
def __init__(self, id_: str, lang: str, parsed_source: ParsedSource = None) -> None: super(Proper, self).__init__() self.id = id_ self.lang = lang try: _, _, rank, color = id_.split(':') self.rank = int(rank) except ValueError: raise ProperNotFound(f"Proper {id_} not found") self.colors = list(color) if parsed_source is not None: self._container = copy(parsed_source._container)
def _parse_source(self, partial_path: str, lang, lookup_section=None) -> ParsedSource: """ Read the file and organize the content as a list of dictionaries where `[Section]` becomes an `id` key and each line below - an item of a `body` list. Resolve references like `@Sancti/02-02:Evangelium`. """ parsed_source: ParsedSource = ParsedSource() section_name: str = None concat_line: bool = False full_path: str = self._get_full_path(partial_path, lang) if not full_path: raise ProperNotFound(f'Proper `{partial_path}` not found.') with open(full_path) as fh: for itr, ln in enumerate(fh): ln = ln.strip() if section_name is None and ln == '': # Skipping empty lines in the beginning of the file continue if ln.strip() == '!': # Skipping lines containing exclamation mark only continue if section_name is None and REFERENCE_REGEX.match(ln): # reference outside any section as a first non-empty line - load all sections # from the referenced file and continue with the sections from the current one. path_bit, _, _ = REFERENCE_REGEX.findall(ln)[0] # Recursively read referenced file nested_path: str = self._get_full_path(f'{path_bit}.txt', lang) if path_bit else partial_path if not nested_path: raise ProperNotFound(f'Proper `{path_bit}.txt` not found.') parsed_source.merge(self._parse_source(nested_path, lang=lang)) continue ln = self._normalize(ln, lang) if re.search(SECTION_REGEX, ln): section_name: str = re.sub(SECTION_REGEX, '\\1', ln) if not lookup_section or lookup_section == section_name: if re.match(SECTION_REGEX, ln): parsed_source.set_section(section_name, Section(section_name)) else: if REFERENCE_REGEX.match(ln): path_bit, nested_section_name, substitution = REFERENCE_REGEX.findall(ln)[0] if path_bit: # Reference to external file - parse it recursively nested_path: str = self._get_full_path(f'{path_bit}.txt', lang) \ if path_bit else partial_path if not nested_path: raise ProperNotFound(f'Proper `{path_bit}.txt` not found.') nested_proper: Proper = self._parse_source( nested_path, lang=lang, lookup_section=nested_section_name) nested_section = nested_proper.get_section(nested_section_name) if nested_section is not None: parsed_source.get_section(section_name).extend_body(nested_section.body) else: log.warning("Section `%s` referenced from `%s` is missing in `%s`", nested_section_name, full_path, nested_path) else: # Reference to the other section in current file nested_section_body = parsed_source.get_section(nested_section_name).body parsed_source.get_section(section_name).extend_body(nested_section_body) else: # Finally, a regular line... # Line ending with `~` indicates that the next line should be treated as its continuation appendln: str = ln.replace('~', ' ') if section_name not in parsed_source.keys(): parsed_source.set_section(section_name, Section(section_name)) if concat_line: parsed_source.get_section(section_name).body[-1] += appendln else: parsed_source.get_section(section_name).append_to_body(appendln) concat_line = True if ln.endswith('~') else False parsed_source = self._strip_newlines(parsed_source) parsed_source = self._resolve_conditionals(parsed_source) return parsed_source