def writePass1txtFiles(self): # Copy the target language sources into the `single.markdown`. # This can be useful when converting the whole book using the PanDoc utility. fnameout = os.path.join(self.xx_aux_dir, 'single.markdown') with open(fnameout, 'w', encoding='utf-8', newline='\n') as fout: for fname, lineno, line in gen.sourceFileLines(self.xx_src_dir): fout.write(line) # Capture the info about the generated file. self.log_info.append(self.short_name(fnameout)) # Copy the target language sources with chapter/line info into a single # file -- mostly for debugging, not consumed later. fnameout = os.path.join(self.xx_aux_dir, 'pass1.txt') with open(fnameout, 'w', encoding='utf-8', newline='\n') as fout: for fname, lineno, line in gen.sourceFileLines(self.xx_src_dir): fout.write('{}/{}:\t{}'.format(fname[:2], lineno, line)) # Capture the info about the generated file for logging. self.log_info.append(self.short_name(fnameout)) # Do the same with the English original -- the `single.markdown`. # This can be useful when converting the whole book using the PanDoc utility. fnameout = os.path.join(self.en_aux_dir, 'single.markdown') with open(fnameout, 'w', encoding='utf-8', newline='\n') as fout: for fname, lineno, line in gen.sourceFileLines(self.en_src_dir): fout.write(line) self.log_info.append(self.short_name(fnameout)) # ... and `pass1.txt` with chapter/line info. fnameout = os.path.join(self.en_aux_dir, 'pass1.txt') with open(fnameout, 'w', encoding='utf-8') as fout: for fname, lineno, line in gen.sourceFileLines(self.en_src_dir): fout.write('{}/{}:\t{}'.format(fname[:2], lineno, line)) self.log_info.append(self.short_name(fnameout))
def loadDoclineLists(self): '''Loads document line objects of the source documents to the lists. As a side effect, the representations of the lines is saved into pass1doclines.txt (mostly for debugging purpose).''' # The target-language sources may contain some extra parts used # as translator notes or some other explanations of the English # original. When compared with the original, the parts must be # skipped. The `definitions/xx/extra_lines.txt` stores the definitions # of the skipped parts in the form that can be cut/pasted from # other logs (UTF-8). If the extra_lines.txt file does not exist, # the empty one is created. # # The definitions are loaded to the dictionary where the key # is the first line of the extra sequence, and the value is # the list of lines of the sequence. # # Note: If it happens and there are two or more sequences # with the same line (say some title of the included sequence), # just split the extra sequences to one extra sequence for # the first line, and the two or more sequences of the rest lines # (without that first line). extras_fname = os.path.join(self.lang_definitions_dir, 'extra_lines.txt') # Create the empty file if it does not exist. if not os.path.isfile(extras_fname): f = open(extras_fname, 'w') f.close() # Load the content to the `extras` dictionary. extras = {} status = 0 lst = None with open(extras_fname, encoding='utf-8') as f: for line in f: if status == 0: # First line is the key, the list is the value. lst = extras.setdefault(line, []) assert len(lst) == 0 # duplicity raises the exception lst.append(line) # first line repeated in the list status = 1 elif status == 1: # The sequence until the separator. if line.startswith('====='): # 5 at minimum lst = None status = 0 else: lst.append(line) # next of the sequence else: raise NotImplementedError('status = {}\n'.format(status)) # Capture the info about the input file with the definitions. self.log_info.append(self.short_name(extras_fname)) # Loop through the lines and build the lists of Line objects # from the original and from the translation. The extra sequences # from the target languages are reported and skipped. They will be # deleted from the list. self.xx_doclines = [] for relname, lineno, line in gen.sourceFileLines(self.xx_src_dir): docline = doc.Line(relname, lineno, line) self.xx_doclines.append(docline) # Delete and report the extra lines. xx_extra_fname = os.path.join(self.xx_aux_dir, 'pass1extra_lines.txt') with open(xx_extra_fname, 'w', encoding='utf-8') as fout: index = 0 # index the processed element while index < len(self.xx_doclines): # do not optimize, the length can change docline = self.xx_doclines[index]# current element if docline.line in extras: # is current line recognized as extra? # I could be the extra sequence. Compare the other lines # in the length of the sequence. Firstly, extract the following # lines in the length of the extras list. extra_lines = extras[docline.line] src_lines = [e.line for e in self.xx_doclines[index:index+len(extra_lines)]] # If the list have the same content, delete the source elements. if src_lines == extra_lines: # Report the skipped lines. fout.write('{}/{}:\n'.format(docline.fname, docline.lineno)) fout.write(''.join(src_lines)) fout.write('====================\n\n') # Delete the lines via deleting their elements. del self.xx_doclines[index:index+len(extra_lines)] # Decrement the index -- i.e. correction as # it will be incremented later. index -= 1 # Jump to the next checked element. index += 1 # Capture the info about the report file. self.log_info.append(self.short_name(xx_extra_fname)) # Report the remaining target-language elements. xx_doclines_fname = os.path.join(self.xx_aux_dir, 'pass1doclines.txt') with open(xx_doclines_fname, 'w', encoding='utf-8') as fout: for docline in self.xx_doclines: fout.write('{}/{} {}: {!r}\n'.format( docline.fname[:2], docline.lineno, docline.type, docline.attrib)) # Capture the info about the report file. self.log_info.append(self.short_name(xx_doclines_fname)) # Report the structure of the English original. self.en_doclines = [] en_doclines_fname = os.path.join(self.en_aux_dir, 'pass1doclines.txt') with open(en_doclines_fname, 'w', encoding='utf-8') as fout: for relname, lineno, line in gen.sourceFileLines(self.en_src_dir): docline = doc.Line(relname, lineno, line) self.en_doclines.append(docline) fout.write('{}/{} {}: {!r}\n'.format( docline.fname[:2], docline.lineno, docline.type, docline.attrib)) # Capture the info about the report file. self.log_info.append(self.short_name(en_doclines_fname))