Beispiel #1
0
def cldr_to_ftl(langs, config):

    page_url = ""  # stem of address for individual Region or Language pages
    find_str = ""  # searched tag to extract data
    name_str = ""  # stem of name of ftl variables

    if config == 0:
        page_url = "http://icu-project.org/trac/browser/trunk/icu4c/source/data/lang/"
        find_str = "Languages{"
        name_str = "language-name-{}"
    elif config == 1:
        page_url = "http://icu-project.org/trac/browser/trunk/icu4c/source/data/region/"
        find_str = "Countries{"
        name_str = "region-name-{}"

    directory = "ftl_files/"

    for lang in langs:
        new_directory = re.sub(".txt", "", lang)
        if not os.path.exists(directory + new_directory):
            os.makedirs(directory + new_directory)
        new_file = new_directory + "/resources.ftl"
        wout = open(directory + new_file, "a")
        if lang[-4:] == ".txt":
            source = urllib.request.urlopen(page_url + lang + "?format=txt")
            text = source.read().decode('utf-8', "strict")
            text_lines = text.split("\n")
            interior = 0
            catch = False
            for line in text_lines:
                line = re.sub("\ufeff", "", line)
                if bool(re.search("^//", line)) == False and bool(
                        re.search("[(/**)(**/)]",
                                  line)) == False and line != '':
                    line = re.sub("\s+", " ", line)
                    line = re.sub("^\s", "", line)

                    if "{" in line and "}" not in line:
                        interior += 1
                        if find_str in line:
                            catch = True
                    elif "}" in line and "{" not in line:
                        interior -= 1
                        catch = False
                    elif len(re.findall("{", line)) < 2 and len(
                            re.findall("}", line)) < 2:
                        if catch == True:
                            parts = re.split("[\{\}\"]", line)

                            res = ast.Resource()

                            l10n_id = ast.Identifier(name_str.format(parts[0]))
                            value = ast.Pattern([ast.TextElement(parts[2])])
                            msg = ast.Message(l10n_id, value)
                            res.body.append(msg)

                            s = serialize(res)
                            wout.write(s)
        wout.close()
Beispiel #2
0
    def read_localization_ftl(self, path):
        """Read and parse an existing localization FTL file.

        Create a new FTL.Resource if the file doesn't exist or can't be
        decoded.
        """
        fullpath = os.path.join(self.localization_dir, path)
        try:
            return self.read_ftl_resource(fullpath)
        except IOError:
            logger = logging.getLogger('migrate')
            logger.info('Localization file {} does not exist and '
                        'it will be created'.format(path))
            return FTL.Resource()
        except UnicodeDecodeError:
            logger = logging.getLogger('migrate')
            logger.warn('Localization file {} has broken encoding. '
                        'It will be re-created and some translations '
                        'may be lost'.format(path))
            return FTL.Resource()
Beispiel #3
0
def merge_resource(ctx, reference, current, transforms, in_changeset):
    """Transform legacy translations into FTL.

    Use the `reference` FTL AST as a template.  For each en-US string in the
    reference, first check if it's in the currently processed changeset with
    `in_changeset`; then check for an existing translation in the current FTL
    `localization` or for a migration specification in `transforms`.
    """

    def merge_body(body):
        return [
            entry
            for entry in map(merge_entry, body)
            if entry is not None
        ]

    def merge_entry(entry):
        # All standalone comments will be merged.
        if isinstance(entry, FTL.Comment):
            return entry

        # All section headers will be merged.
        if isinstance(entry, FTL.Section):
            return entry

        # Ignore Junk
        if isinstance(entry, FTL.Junk):
            return None

        ident = entry.id.name

        # If the message is present in the existing localization, we add it to
        # the resulting resource.  This ensures consecutive merges don't remove
        # translations but rather create supersets of them.
        existing = get_message(current.body, ident)
        if existing is not None:
            return existing

        transform = get_transform(transforms, ident)

        # Make sure this message is supposed to be migrated as part of the
        # current changeset.
        if transform is not None and in_changeset(ident):
            if transform.comment is None:
                transform.comment = entry.comment
            return evaluate(ctx, transform)

    body = merge_body(reference.body)
    return FTL.Resource(body, reference.comment)
Beispiel #4
0
def build_ftl(messages, dtd, data):
    res = ast.Resource()

    for id_str in messages:
        msg = messages[id_str]
        l10n_id = ast.Identifier(id_str)
        val = None
        attrs = []
        if msg['value']:
            dtd_val = get_value_from_dtd(msg['value'], dtd)
            val = ast.Pattern([ast.TextElement(dtd_val)])
        for attr_name in msg['attrs']:
            dtd_val = get_value_from_dtd(msg['attrs'][attr_name], dtd)
            attr_val = ast.Pattern([ast.TextElement(dtd_val)])
            attrs.append(ast.Attribute(ast.Identifier(attr_name), attr_val))

        m = ast.Message(l10n_id, val, attrs)
        res.body.append(m)

    serializer = FluentSerializer()
    return serializer.serialize(res)
Beispiel #5
0
    def add_transforms(self, target, reference, transforms):
        """Define transforms for target using reference as template.

        `target` is a path of the destination FTL file relative to the
        localization directory. `reference` is a path to the template FTL
        file relative to the reference directory.

        Each transform is an extended FTL node with `Transform` nodes as some
        values.  Transforms are stored in their lazy AST form until
        `merge_changeset` is called, at which point they are evaluated to real
        FTL nodes with migrated translations.

        Each transform is scanned for `Source` nodes which will be used to
        build the list of dependencies for the transformed message.

        For transforms that merely copy legacy messages or Fluent patterns,
        using `fluent.migrate.helpers.transforms_from` is recommended.
        """
        def get_sources(acc, cur):
            if isinstance(cur, Source):
                acc.add((cur.path, cur.key))
            return acc

        if self.reference_dir is None:
            # Add skeletons to resource body for each transform
            # if there's no reference.
            reference_ast = self.reference_resources.get(target)
            if reference_ast is None:
                reference_ast = FTL.Resource()
            reference_ast.body.extend(
                skeleton(transform) for transform in transforms)
        else:
            reference_ast = self.read_reference_ftl(reference)
        self.reference_resources[target] = reference_ast

        for node in transforms:
            ident = node.id.name
            # Scan `node` for `Source` nodes and collect the information they
            # store into a set of dependencies.
            dependencies = fold(get_sources, node, set())
            # Set these sources as dependencies for the current transform.
            self.dependencies[(target, ident)] = dependencies

            # The target Fluent message should exist in the reference file. If
            # it doesn't, it's probably a typo.
            # Of course, only if we're having a reference.
            if self.reference_dir is None:
                continue
            if get_message(reference_ast.body, ident) is None:
                logger = logging.getLogger('migrate')
                logger.warning('{} "{}" was not found in {}'.format(
                    type(node).__name__, ident, reference))

        # Keep track of localization resource paths which were defined as
        # sources in the transforms.
        expected_paths = set()

        # Read all legacy translation files defined in Source transforms. This
        # may fail but a single missing legacy resource doesn't mean that the
        # migration can't succeed.
        for dependencies in self.dependencies.values():
            for path in set(path for path, _ in dependencies):
                expected_paths.add(path)
                self.maybe_add_localization(path)

        # However, if all legacy resources are missing, bail out early. There
        # are no translations to migrate. We'd also get errors in hg annotate.
        if len(expected_paths) > 0 and len(self.localization_resources) == 0:
            error_message = 'No localization files were found'
            logging.getLogger('migrate').error(error_message)
            raise EmptyLocalizationError(error_message)

        # Add the current transforms to any other transforms added earlier for
        # this path.
        path_transforms = self.transforms.setdefault(target, [])
        path_transforms += transforms

        if target not in self.target_resources:
            target_ast = self.read_localization_ftl(target)
            self.target_resources[target] = target_ast
Beispiel #6
0
    def merge_changeset(self, changeset=None):
        """Return a generator of FTL ASTs for the changeset.

        The input data must be configured earlier using the `add_*` methods.
        if given, `changeset` must be a set of (path, key) tuples describing
        which legacy translations are to be merged.

        Given `changeset`, return a dict whose keys are resource paths and
        values are `FTL.Resource` instances.  The values will also be used to
        update this context's existing localization resources.
        """

        if changeset is None:
            # Merge all known legacy translations. Used in tests.
            changeset = {
                (path, key)
                for path, strings in self.localization_resources.iteritems()
                if not path.endswith('.ftl')
                for key in strings.iterkeys()
            }

        for path, reference in self.reference_resources.iteritems():
            current = self.localization_resources.get(path, FTL.Resource())
            transforms = self.transforms.get(path, [])

            def in_changeset(ident):
                """Check if entity should be merged.

                If at least one dependency of the entity is in the current
                set of changeset, merge it.
                """
                message_deps = self.dependencies.get((path, ident), None)

                # Don't merge if we don't have a transform for this message.
                if message_deps is None:
                    return False

                # As a special case, if a transform exists but has no
                # dependecies, it's a hardcoded `FTL.Node` which doesn't
                # migrate any existing translation but rather creates a new
                # one.  Merge it.
                if len(message_deps) == 0:
                    return True

                # If the intersection of the dependencies and the current
                # changeset is non-empty, merge this message.
                return message_deps & changeset

            # Merge legacy translations with the existing ones using the
            # reference as a template.
            snapshot = merge_resource(
                self, reference, current, transforms, in_changeset
            )

            # Skip this path if the messages in the merged snapshot are
            # identical to those in the current state of the localization file.
            # This may happen when:
            #
            #   - none of the transforms is in the changset, or
            #   - all messages which would be migrated by the context's
            #     transforms already exist in the current state.
            if self.messages_equal(current, snapshot):
                continue

            # Store the merged snapshot on the context so that the next merge
            # already takes it into account as the existing localization.
            self.localization_resources[path] = snapshot

            # The result for this path is a complete `FTL.Resource`.
            yield path, snapshot
Beispiel #7
0
 def serialize(self, out):
     body = [unit.to_entry() for unit in self.units]
     out.write(serialize(ast.Resource(body)).encode(self.encoding))