def cldr_to_ftl(langs, config): page_url = "" # stem of address for individual Region or Language pages find_str = "" # searched tag to extract data name_str = "" # stem of name of ftl variables if config == 0: page_url = "http://icu-project.org/trac/browser/trunk/icu4c/source/data/lang/" find_str = "Languages{" name_str = "language-name-{}" elif config == 1: page_url = "http://icu-project.org/trac/browser/trunk/icu4c/source/data/region/" find_str = "Countries{" name_str = "region-name-{}" directory = "ftl_files/" for lang in langs: new_directory = re.sub(".txt", "", lang) if not os.path.exists(directory + new_directory): os.makedirs(directory + new_directory) new_file = new_directory + "/resources.ftl" wout = open(directory + new_file, "a") if lang[-4:] == ".txt": source = urllib.request.urlopen(page_url + lang + "?format=txt") text = source.read().decode('utf-8', "strict") text_lines = text.split("\n") interior = 0 catch = False for line in text_lines: line = re.sub("\ufeff", "", line) if bool(re.search("^//", line)) == False and bool( re.search("[(/**)(**/)]", line)) == False and line != '': line = re.sub("\s+", " ", line) line = re.sub("^\s", "", line) if "{" in line and "}" not in line: interior += 1 if find_str in line: catch = True elif "}" in line and "{" not in line: interior -= 1 catch = False elif len(re.findall("{", line)) < 2 and len( re.findall("}", line)) < 2: if catch == True: parts = re.split("[\{\}\"]", line) res = ast.Resource() l10n_id = ast.Identifier(name_str.format(parts[0])) value = ast.Pattern([ast.TextElement(parts[2])]) msg = ast.Message(l10n_id, value) res.body.append(msg) s = serialize(res) wout.write(s) wout.close()
def read_localization_ftl(self, path): """Read and parse an existing localization FTL file. Create a new FTL.Resource if the file doesn't exist or can't be decoded. """ fullpath = os.path.join(self.localization_dir, path) try: return self.read_ftl_resource(fullpath) except IOError: logger = logging.getLogger('migrate') logger.info('Localization file {} does not exist and ' 'it will be created'.format(path)) return FTL.Resource() except UnicodeDecodeError: logger = logging.getLogger('migrate') logger.warn('Localization file {} has broken encoding. ' 'It will be re-created and some translations ' 'may be lost'.format(path)) return FTL.Resource()
def merge_resource(ctx, reference, current, transforms, in_changeset): """Transform legacy translations into FTL. Use the `reference` FTL AST as a template. For each en-US string in the reference, first check if it's in the currently processed changeset with `in_changeset`; then check for an existing translation in the current FTL `localization` or for a migration specification in `transforms`. """ def merge_body(body): return [ entry for entry in map(merge_entry, body) if entry is not None ] def merge_entry(entry): # All standalone comments will be merged. if isinstance(entry, FTL.Comment): return entry # All section headers will be merged. if isinstance(entry, FTL.Section): return entry # Ignore Junk if isinstance(entry, FTL.Junk): return None ident = entry.id.name # If the message is present in the existing localization, we add it to # the resulting resource. This ensures consecutive merges don't remove # translations but rather create supersets of them. existing = get_message(current.body, ident) if existing is not None: return existing transform = get_transform(transforms, ident) # Make sure this message is supposed to be migrated as part of the # current changeset. if transform is not None and in_changeset(ident): if transform.comment is None: transform.comment = entry.comment return evaluate(ctx, transform) body = merge_body(reference.body) return FTL.Resource(body, reference.comment)
def build_ftl(messages, dtd, data): res = ast.Resource() for id_str in messages: msg = messages[id_str] l10n_id = ast.Identifier(id_str) val = None attrs = [] if msg['value']: dtd_val = get_value_from_dtd(msg['value'], dtd) val = ast.Pattern([ast.TextElement(dtd_val)]) for attr_name in msg['attrs']: dtd_val = get_value_from_dtd(msg['attrs'][attr_name], dtd) attr_val = ast.Pattern([ast.TextElement(dtd_val)]) attrs.append(ast.Attribute(ast.Identifier(attr_name), attr_val)) m = ast.Message(l10n_id, val, attrs) res.body.append(m) serializer = FluentSerializer() return serializer.serialize(res)
def add_transforms(self, target, reference, transforms): """Define transforms for target using reference as template. `target` is a path of the destination FTL file relative to the localization directory. `reference` is a path to the template FTL file relative to the reference directory. Each transform is an extended FTL node with `Transform` nodes as some values. Transforms are stored in their lazy AST form until `merge_changeset` is called, at which point they are evaluated to real FTL nodes with migrated translations. Each transform is scanned for `Source` nodes which will be used to build the list of dependencies for the transformed message. For transforms that merely copy legacy messages or Fluent patterns, using `fluent.migrate.helpers.transforms_from` is recommended. """ def get_sources(acc, cur): if isinstance(cur, Source): acc.add((cur.path, cur.key)) return acc if self.reference_dir is None: # Add skeletons to resource body for each transform # if there's no reference. reference_ast = self.reference_resources.get(target) if reference_ast is None: reference_ast = FTL.Resource() reference_ast.body.extend( skeleton(transform) for transform in transforms) else: reference_ast = self.read_reference_ftl(reference) self.reference_resources[target] = reference_ast for node in transforms: ident = node.id.name # Scan `node` for `Source` nodes and collect the information they # store into a set of dependencies. dependencies = fold(get_sources, node, set()) # Set these sources as dependencies for the current transform. self.dependencies[(target, ident)] = dependencies # The target Fluent message should exist in the reference file. If # it doesn't, it's probably a typo. # Of course, only if we're having a reference. if self.reference_dir is None: continue if get_message(reference_ast.body, ident) is None: logger = logging.getLogger('migrate') logger.warning('{} "{}" was not found in {}'.format( type(node).__name__, ident, reference)) # Keep track of localization resource paths which were defined as # sources in the transforms. expected_paths = set() # Read all legacy translation files defined in Source transforms. This # may fail but a single missing legacy resource doesn't mean that the # migration can't succeed. for dependencies in self.dependencies.values(): for path in set(path for path, _ in dependencies): expected_paths.add(path) self.maybe_add_localization(path) # However, if all legacy resources are missing, bail out early. There # are no translations to migrate. We'd also get errors in hg annotate. if len(expected_paths) > 0 and len(self.localization_resources) == 0: error_message = 'No localization files were found' logging.getLogger('migrate').error(error_message) raise EmptyLocalizationError(error_message) # Add the current transforms to any other transforms added earlier for # this path. path_transforms = self.transforms.setdefault(target, []) path_transforms += transforms if target not in self.target_resources: target_ast = self.read_localization_ftl(target) self.target_resources[target] = target_ast
def merge_changeset(self, changeset=None): """Return a generator of FTL ASTs for the changeset. The input data must be configured earlier using the `add_*` methods. if given, `changeset` must be a set of (path, key) tuples describing which legacy translations are to be merged. Given `changeset`, return a dict whose keys are resource paths and values are `FTL.Resource` instances. The values will also be used to update this context's existing localization resources. """ if changeset is None: # Merge all known legacy translations. Used in tests. changeset = { (path, key) for path, strings in self.localization_resources.iteritems() if not path.endswith('.ftl') for key in strings.iterkeys() } for path, reference in self.reference_resources.iteritems(): current = self.localization_resources.get(path, FTL.Resource()) transforms = self.transforms.get(path, []) def in_changeset(ident): """Check if entity should be merged. If at least one dependency of the entity is in the current set of changeset, merge it. """ message_deps = self.dependencies.get((path, ident), None) # Don't merge if we don't have a transform for this message. if message_deps is None: return False # As a special case, if a transform exists but has no # dependecies, it's a hardcoded `FTL.Node` which doesn't # migrate any existing translation but rather creates a new # one. Merge it. if len(message_deps) == 0: return True # If the intersection of the dependencies and the current # changeset is non-empty, merge this message. return message_deps & changeset # Merge legacy translations with the existing ones using the # reference as a template. snapshot = merge_resource( self, reference, current, transforms, in_changeset ) # Skip this path if the messages in the merged snapshot are # identical to those in the current state of the localization file. # This may happen when: # # - none of the transforms is in the changset, or # - all messages which would be migrated by the context's # transforms already exist in the current state. if self.messages_equal(current, snapshot): continue # Store the merged snapshot on the context so that the next merge # already takes it into account as the existing localization. self.localization_resources[path] = snapshot # The result for this path is a complete `FTL.Resource`. yield path, snapshot
def serialize(self, out): body = [unit.to_entry() for unit in self.units] out.write(serialize(ast.Resource(body)).encode(self.encoding))