def handle(self): output_directory = Path(self.args.output_directory) # We simulate pybabel and sphinx-build commands. Variable names are chosen to match upstream code. # For sphinx-build, the code path is: # # * bin/sphinx-build calls main() in sphinx, which calls build_main(), which calls main() in sphinx.cmdline # * main() calls Sphinx(…).build(…) in sphinx.application # sphinx-build -E -q … kwargs = { 'confoverrides': { 'source_suffix': ['.rst', '.md'], 'source_parsers': { '.md': CommonMarkParser, }, }, 'freshenv': True, 'parallel': 1, } if not self.args.verbose: kwargs.update(status=None) # For pybabel, the code path is: # # * bin/pybabel calls main() in babel.messages.frontend # * main() calls CommandLineInterface().run(sys.argv) # * CommandLineInterface() calls extract_messages(), which: # 1. Reads the input path and method map from command-line options # 2. Instantiates a catalog # 3. Calls extract_from_dir() in babel.messages.extract to extract messages # 4. extract_from_dir() calls check_and_call_extract_file() to find the method in the method map # 5. check_and_call_extract_file() calls extract_from_file() to open a file for extraction # 6. extract_from_file() calls extract() to extract messages # 7. Adds the messages to the catalog # 8. Writes a POT file # 1. Reads the input path and method map from command-line options arguments = [ # pybabel extract -F babel_ocds_codelist.cfg . -o $(POT_DIR)/$(DOMAIN_PREFIX)codelists.pot ('codelists.pot', [ ('codelists/*.csv', extract_codelist), ]), # pybabel extract -F babel_ocds_schema.cfg . -o $(POT_DIR)/$(DOMAIN_PREFIX)schema.pot ('schema.pot', [ ('*-schema.json', extract_schema), ('extension.json', extract_extension_metadata), ]), ] for version in self.versions(): if not version.download_url: logger.warning('No Download URL for {}=={}'.format( version.id, version.version)) outdir = output_directory / version.id / version.version outdir.mkdir(parents=True, exist_ok=True) # See the `files` method of `ExtensionVersion` for similar code. response = requests.get(version.download_url, allow_redirects=True) response.raise_for_status() with closing(ZipFile(BytesIO(response.content))) as zipfile: names = zipfile.namelist() start = len(names[0]) for output_file, method_map in arguments: # 2. Instantiates a catalog catalog = Catalog() # 3. Calls extract_from_dir() in babel.messages.extract to extract messages for name in names[1:]: filename = name[start:] # 4. extract_from_dir() calls check_and_call_extract_file() for pattern, method in method_map: if not pathmatch(pattern, filename): continue # 5. check_and_call_extract_file() calls extract_from_file() with zipfile.open(name) as fileobj: # 6. extract_from_file() calls extract() to extract messages for lineno, message, comments, context in extract( method, fileobj): # 7. Adds the messages to the catalog catalog.add(message, None, [(filename, lineno)], auto_comments=comments, context=context) break # 8. Writes a POT file if catalog: with open(outdir / output_file, 'wb') as outfile: write_po(outfile, catalog) with TemporaryDirectory() as srcdir: for info in zipfile.infolist()[1:]: filename = info.filename[start:] if filename[-1] != '/' and filename.startswith( 'docs/') or filename == 'README.md': info.filename = filename zipfile.extract(info, srcdir) with cd(srcdir): # Eliminates a warning, without change to output. with open('contents.rst', 'w') as f: f.write( '.. toctree::\n :glob:\n\n docs/*\n README' ) # sphinx-build -b gettext $(DOCS_DIR) $(POT_DIR) app = Sphinx('.', None, '.', '.', 'gettext', **kwargs) app.build(True) # https://stackoverflow.com/questions/15408348 content = subprocess.run(['msgcat', *glob('*.pot')], check=True, stdout=subprocess.PIPE).stdout with open(outdir / 'docs.pot', 'wb') as f: f.write(content)
def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False): """Read messages from a ``gettext`` PO (portable object) file from the given file-like object and return a `Catalog`. >>> from babel.compat import StringIO >>> buf = StringIO(''' ... #: main.py:1 ... #, fuzzy, python-format ... msgid "foo %(name)s" ... msgstr "quux %(name)s" ... ... # A user comment ... #. An auto comment ... #: main.py:3 ... msgid "bar" ... msgid_plural "baz" ... msgstr[0] "bar" ... msgstr[1] "baaz" ... ''') >>> catalog = read_po(buf) >>> catalog.revision_date = datetime(2007, 4, 1) >>> actual = [] >>> for message in catalog: ... if message.id: ... actual.append((message.id, message.string)) ... actual.append((message.locations, message.flags)) ... actual.append((message.user_comments, message.auto_comments)) >>> expected = [ ... ('foo %(name)s', 'quux %(name)s'), ... ([('main.py', 1)], set(['fuzzy', 'python-format'])), ... ([], []), ... (('bar', 'baz'), ('bar', 'baaz')), ... ([('main.py', 3)], set([])), ... (['A user comment'], ['An auto comment']) ... ] >>> actual == expected True :param fileobj: the file-like object to read the PO file from :param locale: the locale identifier or `Locale` object, or `None` if the catalog is not bound to a locale (which basically means it's a template) :param domain: the message domain :param ignore_obsolete: whether to ignore obsolete messages in the input :return: a catalog object representing the parsed PO file :rtype: `Catalog` """ catalog = Catalog(locale=locale, domain=domain) counter = [0] offset = [0] messages = [] translations = [] locations = [] flags = [] user_comments = [] auto_comments = [] obsolete = [False] context = [] in_msgid = [False] in_msgstr = [False] in_msgctxt = [False] def _add_message(): translations.sort() if len(messages) > 1: msgid = tuple([denormalize(m) for m in messages]) else: msgid = denormalize(messages[0]) if isinstance(msgid, (list, tuple)): string = [] for idx in range(catalog.num_plurals): try: string.append(translations[idx]) except IndexError: string.append((idx, '')) string = tuple([denormalize(t[1]) for t in string]) else: string = denormalize(translations[0][1]) if context: msgctxt = denormalize('\n'.join(context)) else: msgctxt = None message = Message(msgid, string, list(locations), set(flags), auto_comments, user_comments, lineno=offset[0] + 1, context=msgctxt) if obsolete[0]: if not ignore_obsolete: catalog.obsolete[msgid] = message else: catalog[msgid] = message del messages[:]; del translations[:]; del context[:]; del locations[:]; del flags[:]; del auto_comments[:]; del user_comments[:]; obsolete[0] = False counter[0] += 1 def _process_message_line(lineno, line): if line.startswith('msgid_plural'): in_msgid[0] = True msg = line[12:].lstrip() messages.append(msg) elif line.startswith('msgid'): in_msgid[0] = True offset[0] = lineno txt = line[5:].lstrip() if messages: _add_message() messages.append(txt) elif line.startswith('msgstr'): in_msgid[0] = False in_msgstr[0] = True msg = line[6:].lstrip() if msg.startswith('['): idx, msg = msg[1:].split(']', 1) translations.append([int(idx), msg.lstrip()]) else: translations.append([0, msg]) elif line.startswith('msgctxt'): if messages: _add_message() in_msgid[0] = in_msgstr[0] = False context.append(line[7:].lstrip()) elif line.startswith('"'): if in_msgid[0]: messages[-1] += '\n' + line.rstrip() elif in_msgstr[0]: translations[-1][1] += '\n' + line.rstrip() elif in_msgctxt[0]: context.append(line.rstrip()) for lineno, line in enumerate(fileobj.readlines()): line = line.strip() if not isinstance(line, text_type): line = line.decode(catalog.charset) if line.startswith('#'): in_msgid[0] = in_msgstr[0] = False if messages and translations: _add_message() if line[1:].startswith(':'): for location in line[2:].lstrip().split(): pos = location.rfind(':') if pos >= 0: try: lineno = int(location[pos + 1:]) except ValueError: continue locations.append((location[:pos], lineno)) elif line[1:].startswith(','): for flag in line[2:].lstrip().split(','): flags.append(flag.strip()) elif line[1:].startswith('~'): obsolete[0] = True _process_message_line(lineno, line[2:].lstrip()) elif line[1:].startswith('.'): # These are called auto-comments comment = line[2:].strip() if comment: # Just check that we're not adding empty comments auto_comments.append(comment) else: # These are called user comments user_comments.append(line[1:].strip()) else: _process_message_line(lineno, line) if messages: _add_message() # No actual messages found, but there was some info in comments, from which # we'll construct an empty header message elif not counter[0] and (flags or user_comments or auto_comments): messages.append('') translations.append([0, '']) _add_message() return catalog
def read_mo(fileobj): """Read a binary MO file from the given file-like object and return a corresponding `Catalog` object. :param fileobj: the file-like object to read the MO file from :note: The implementation of this function is heavily based on the ``GNUTranslations._parse`` method of the ``gettext`` module in the standard library. """ catalog = Catalog() headers = {} filename = getattr(fileobj, 'name', '') buf = fileobj.read() buflen = len(buf) unpack = struct.unpack # Parse the .mo file header, which consists of 5 little endian 32 # bit words. magic = unpack('<I', buf[:4])[0] # Are we big endian or little endian? if magic == LE_MAGIC: version, msgcount, origidx, transidx = unpack('<4I', buf[4:20]) ii = '<II' elif magic == BE_MAGIC: version, msgcount, origidx, transidx = unpack('>4I', buf[4:20]) ii = '>II' else: raise IOError(0, 'Bad magic number', filename) # Now put all messages from the .mo file buffer into the catalog # dictionary for i in range_type(0, msgcount): mlen, moff = unpack(ii, buf[origidx:origidx + 8]) mend = moff + mlen tlen, toff = unpack(ii, buf[transidx:transidx + 8]) tend = toff + tlen if mend < buflen and tend < buflen: msg = buf[moff:mend] tmsg = buf[toff:tend] else: raise IOError(0, 'File is corrupt', filename) # See if we're looking at GNU .mo conventions for metadata if mlen == 0: # Catalog description lastkey = key = None for item in tmsg.splitlines(): item = item.strip() if not item: continue if b':' in item: key, value = item.split(b':', 1) lastkey = key = key.strip().lower() headers[key] = value.strip() elif lastkey: headers[lastkey] += b'\n' + item if b'\x04' in msg: # context ctxt, msg = msg.split(b'\x04') else: ctxt = None if b'\x00' in msg: # plural forms msg = msg.split(b'\x00') tmsg = tmsg.split(b'\x00') if catalog.charset: msg = [x.decode(catalog.charset) for x in msg] tmsg = [x.decode(catalog.charset) for x in tmsg] else: if catalog.charset: msg = msg.decode(catalog.charset) tmsg = tmsg.decode(catalog.charset) catalog[msg] = Message(msg, tmsg, context=ctxt) # advance to next entry in the seek tables origidx += 8 transidx += 8 catalog.mime_headers = headers.items() return catalog