Esempio n. 1
0
    def handle(self):
        output_directory = Path(self.args.output_directory)

        # We simulate pybabel and sphinx-build commands. Variable names are chosen to match upstream code.

        # For sphinx-build, the code path is:
        #
        # * bin/sphinx-build calls main() in sphinx, which calls build_main(), which calls main() in sphinx.cmdline
        # * main() calls Sphinx(…).build(…) in sphinx.application

        # sphinx-build -E -q …
        kwargs = {
            'confoverrides': {
                'source_suffix': ['.rst', '.md'],
                'source_parsers': {
                    '.md': CommonMarkParser,
                },
            },
            'freshenv': True,
            'parallel': 1,
        }
        if not self.args.verbose:
            kwargs.update(status=None)

        # For pybabel, the code path is:
        #
        # * bin/pybabel calls main() in babel.messages.frontend
        # * main() calls CommandLineInterface().run(sys.argv)
        # * CommandLineInterface() calls extract_messages(), which:
        #   1. Reads the input path and method map from command-line options
        #   2. Instantiates a catalog
        #   3. Calls extract_from_dir() in babel.messages.extract to extract messages
        #   4. extract_from_dir() calls check_and_call_extract_file() to find the method in the method map
        #   5. check_and_call_extract_file() calls extract_from_file() to open a file for extraction
        #   6. extract_from_file() calls extract() to extract messages
        #   7. Adds the messages to the catalog
        #   8. Writes a POT file

        # 1. Reads the input path and method map from command-line options
        arguments = [
            # pybabel extract -F babel_ocds_codelist.cfg . -o $(POT_DIR)/$(DOMAIN_PREFIX)codelists.pot
            ('codelists.pot', [
                ('codelists/*.csv', extract_codelist),
            ]),
            # pybabel extract -F babel_ocds_schema.cfg . -o $(POT_DIR)/$(DOMAIN_PREFIX)schema.pot
            ('schema.pot', [
                ('*-schema.json', extract_schema),
                ('extension.json', extract_extension_metadata),
            ]),
        ]

        for version in self.versions():
            if not version.download_url:
                logger.warning('No Download URL for {}=={}'.format(
                    version.id, version.version))

            outdir = output_directory / version.id / version.version

            outdir.mkdir(parents=True, exist_ok=True)

            # See the `files` method of `ExtensionVersion` for similar code.
            response = requests.get(version.download_url, allow_redirects=True)
            response.raise_for_status()
            with closing(ZipFile(BytesIO(response.content))) as zipfile:
                names = zipfile.namelist()
                start = len(names[0])

                for output_file, method_map in arguments:
                    # 2. Instantiates a catalog
                    catalog = Catalog()

                    # 3. Calls extract_from_dir() in babel.messages.extract to extract messages
                    for name in names[1:]:
                        filename = name[start:]

                        # 4. extract_from_dir() calls check_and_call_extract_file()
                        for pattern, method in method_map:
                            if not pathmatch(pattern, filename):
                                continue

                            # 5. check_and_call_extract_file() calls extract_from_file()
                            with zipfile.open(name) as fileobj:
                                # 6. extract_from_file() calls extract() to extract messages
                                for lineno, message, comments, context in extract(
                                        method, fileobj):
                                    # 7. Adds the messages to the catalog
                                    catalog.add(message,
                                                None, [(filename, lineno)],
                                                auto_comments=comments,
                                                context=context)

                            break

                    # 8. Writes a POT file
                    if catalog:
                        with open(outdir / output_file, 'wb') as outfile:
                            write_po(outfile, catalog)

                with TemporaryDirectory() as srcdir:
                    for info in zipfile.infolist()[1:]:
                        filename = info.filename[start:]
                        if filename[-1] != '/' and filename.startswith(
                                'docs/') or filename == 'README.md':
                            info.filename = filename
                            zipfile.extract(info, srcdir)

                    with cd(srcdir):
                        # Eliminates a warning, without change to output.
                        with open('contents.rst', 'w') as f:
                            f.write(
                                '.. toctree::\n   :glob:\n\n   docs/*\n   README'
                            )

                        # sphinx-build -b gettext $(DOCS_DIR) $(POT_DIR)
                        app = Sphinx('.', None, '.', '.', 'gettext', **kwargs)
                        app.build(True)

                        # https://stackoverflow.com/questions/15408348
                        content = subprocess.run(['msgcat', *glob('*.pot')],
                                                 check=True,
                                                 stdout=subprocess.PIPE).stdout

                with open(outdir / 'docs.pot', 'wb') as f:
                    f.write(content)
Esempio n. 2
0
def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False):
    """Read messages from a ``gettext`` PO (portable object) file from the given
    file-like object and return a `Catalog`.

    >>> from babel.compat import StringIO
    >>> buf = StringIO('''
    ... #: main.py:1
    ... #, fuzzy, python-format
    ... msgid "foo %(name)s"
    ... msgstr "quux %(name)s"
    ...
    ... # A user comment
    ... #. An auto comment
    ... #: main.py:3
    ... msgid "bar"
    ... msgid_plural "baz"
    ... msgstr[0] "bar"
    ... msgstr[1] "baaz"
    ... ''')
    >>> catalog = read_po(buf)
    >>> catalog.revision_date = datetime(2007, 4, 1)

    >>> actual = []
    >>> for message in catalog:
    ...     if message.id:
    ...         actual.append((message.id, message.string))
    ...         actual.append((message.locations, message.flags))
    ...         actual.append((message.user_comments, message.auto_comments))
    >>> expected = [
    ...   ('foo %(name)s', 'quux %(name)s'),
    ...   ([('main.py', 1)], set(['fuzzy', 'python-format'])),
    ...   ([], []),
    ...   (('bar', 'baz'), ('bar', 'baaz')),
    ...   ([('main.py', 3)], set([])),
    ...   (['A user comment'], ['An auto comment'])
    ... ]
    >>> actual == expected
    True

    :param fileobj: the file-like object to read the PO file from
    :param locale: the locale identifier or `Locale` object, or `None`
                   if the catalog is not bound to a locale (which basically
                   means it's a template)
    :param domain: the message domain
    :param ignore_obsolete: whether to ignore obsolete messages in the input
    :return: a catalog object representing the parsed PO file
    :rtype: `Catalog`
    """
    catalog = Catalog(locale=locale, domain=domain)

    counter = [0]
    offset = [0]
    messages = []
    translations = []
    locations = []
    flags = []
    user_comments = []
    auto_comments = []
    obsolete = [False]
    context = []
    in_msgid = [False]
    in_msgstr = [False]
    in_msgctxt = [False]

    def _add_message():
        translations.sort()
        if len(messages) > 1:
            msgid = tuple([denormalize(m) for m in messages])
        else:
            msgid = denormalize(messages[0])
        if isinstance(msgid, (list, tuple)):
            string = []
            for idx in range(catalog.num_plurals):
                try:
                    string.append(translations[idx])
                except IndexError:
                    string.append((idx, ''))
            string = tuple([denormalize(t[1]) for t in string])
        else:
            string = denormalize(translations[0][1])
        if context:
            msgctxt = denormalize('\n'.join(context))
        else:
            msgctxt = None
        message = Message(msgid, string, list(locations), set(flags),
                          auto_comments, user_comments, lineno=offset[0] + 1,
                          context=msgctxt)
        if obsolete[0]:
            if not ignore_obsolete:
                catalog.obsolete[msgid] = message
        else:
            catalog[msgid] = message
        del messages[:]; del translations[:]; del context[:]; del locations[:];
        del flags[:]; del auto_comments[:]; del user_comments[:];
        obsolete[0] = False
        counter[0] += 1

    def _process_message_line(lineno, line):
        if line.startswith('msgid_plural'):
            in_msgid[0] = True
            msg = line[12:].lstrip()
            messages.append(msg)
        elif line.startswith('msgid'):
            in_msgid[0] = True
            offset[0] = lineno
            txt = line[5:].lstrip()
            if messages:
                _add_message()
            messages.append(txt)
        elif line.startswith('msgstr'):
            in_msgid[0] = False
            in_msgstr[0] = True
            msg = line[6:].lstrip()
            if msg.startswith('['):
                idx, msg = msg[1:].split(']', 1)
                translations.append([int(idx), msg.lstrip()])
            else:
                translations.append([0, msg])
        elif line.startswith('msgctxt'):
            if messages:
                _add_message()
            in_msgid[0] = in_msgstr[0] = False
            context.append(line[7:].lstrip())
        elif line.startswith('"'):
            if in_msgid[0]:
                messages[-1] += '\n' + line.rstrip()
            elif in_msgstr[0]:
                translations[-1][1] += '\n' + line.rstrip()
            elif in_msgctxt[0]:
                context.append(line.rstrip())

    for lineno, line in enumerate(fileobj.readlines()):
        line = line.strip()
        if not isinstance(line, text_type):
            line = line.decode(catalog.charset)
        if line.startswith('#'):
            in_msgid[0] = in_msgstr[0] = False
            if messages and translations:
                _add_message()
            if line[1:].startswith(':'):
                for location in line[2:].lstrip().split():
                    pos = location.rfind(':')
                    if pos >= 0:
                        try:
                            lineno = int(location[pos + 1:])
                        except ValueError:
                            continue
                        locations.append((location[:pos], lineno))
            elif line[1:].startswith(','):
                for flag in line[2:].lstrip().split(','):
                    flags.append(flag.strip())
            elif line[1:].startswith('~'):
                obsolete[0] = True
                _process_message_line(lineno, line[2:].lstrip())
            elif line[1:].startswith('.'):
                # These are called auto-comments
                comment = line[2:].strip()
                if comment: # Just check that we're not adding empty comments
                    auto_comments.append(comment)
            else:
                # These are called user comments
                user_comments.append(line[1:].strip())
        else:
            _process_message_line(lineno, line)

    if messages:
        _add_message()

    # No actual messages found, but there was some info in comments, from which
    # we'll construct an empty header message
    elif not counter[0] and (flags or user_comments or auto_comments):
        messages.append('')
        translations.append([0, ''])
        _add_message()

    return catalog
Esempio n. 3
0
def read_mo(fileobj):
    """Read a binary MO file from the given file-like object and return a
    corresponding `Catalog` object.

    :param fileobj: the file-like object to read the MO file from

    :note: The implementation of this function is heavily based on the
           ``GNUTranslations._parse`` method of the ``gettext`` module in the
           standard library.
    """
    catalog = Catalog()
    headers = {}

    filename = getattr(fileobj, 'name', '')

    buf = fileobj.read()
    buflen = len(buf)
    unpack = struct.unpack

    # Parse the .mo file header, which consists of 5 little endian 32
    # bit words.
    magic = unpack('<I', buf[:4])[0]  # Are we big endian or little endian?
    if magic == LE_MAGIC:
        version, msgcount, origidx, transidx = unpack('<4I', buf[4:20])
        ii = '<II'
    elif magic == BE_MAGIC:
        version, msgcount, origidx, transidx = unpack('>4I', buf[4:20])
        ii = '>II'
    else:
        raise IOError(0, 'Bad magic number', filename)

    # Now put all messages from the .mo file buffer into the catalog
    # dictionary
    for i in range_type(0, msgcount):
        mlen, moff = unpack(ii, buf[origidx:origidx + 8])
        mend = moff + mlen
        tlen, toff = unpack(ii, buf[transidx:transidx + 8])
        tend = toff + tlen
        if mend < buflen and tend < buflen:
            msg = buf[moff:mend]
            tmsg = buf[toff:tend]
        else:
            raise IOError(0, 'File is corrupt', filename)

        # See if we're looking at GNU .mo conventions for metadata
        if mlen == 0:
            # Catalog description
            lastkey = key = None
            for item in tmsg.splitlines():
                item = item.strip()
                if not item:
                    continue
                if b':' in item:
                    key, value = item.split(b':', 1)
                    lastkey = key = key.strip().lower()
                    headers[key] = value.strip()
                elif lastkey:
                    headers[lastkey] += b'\n' + item

        if b'\x04' in msg:  # context
            ctxt, msg = msg.split(b'\x04')
        else:
            ctxt = None

        if b'\x00' in msg:  # plural forms
            msg = msg.split(b'\x00')
            tmsg = tmsg.split(b'\x00')
            if catalog.charset:
                msg = [x.decode(catalog.charset) for x in msg]
                tmsg = [x.decode(catalog.charset) for x in tmsg]
        else:
            if catalog.charset:
                msg = msg.decode(catalog.charset)
                tmsg = tmsg.decode(catalog.charset)
        catalog[msg] = Message(msg, tmsg, context=ctxt)

        # advance to next entry in the seek tables
        origidx += 8
        transidx += 8

    catalog.mime_headers = headers.items()
    return catalog