예제 #1
0
 def __init__(self, personality="java", blankmsgstr=False,
              duplicatestyle="msgctxt"):
     self.personality = personality
     self.blankmsgstr = blankmsgstr
     self.duplicatestyle = duplicatestyle
     self.mixedkeys = {}
     self.mixer = UnitMixer(properties.labelsuffixes,
                            properties.accesskeysuffixes)
예제 #2
0
 def __init__(self, personality="java", blankmsgstr=False,
              duplicatestyle="msgctxt"):
     self.personality = personality
     self.blankmsgstr = blankmsgstr
     self.duplicatestyle = duplicatestyle
     self.mixedkeys = {}
     self.mixer = UnitMixer(properties.labelsuffixes,
                            properties.accesskeysuffixes)
예제 #3
0
class prop2po:
    """convert a .properties file to a .po file for handling the translation.
    """
    def __init__(self,
                 personality="java",
                 blankmsgstr=False,
                 duplicatestyle="msgctxt"):
        self.personality = personality
        self.blankmsgstr = blankmsgstr
        self.duplicatestyle = duplicatestyle
        self.mixedkeys = {}
        self.mixer = UnitMixer(properties.labelsuffixes,
                               properties.accesskeysuffixes)

    def convertstore(self, thepropfile):
        """converts a .properties file to a .po file..."""
        thetargetfile = po.pofile()
        if self.personality in ("mozilla", "skype"):
            targetheader = thetargetfile.init_headers(
                x_accelerator_marker="&",
                x_merge_on="location",
            )
        else:
            targetheader = thetargetfile.header()
        targetheader.addnote("extracted from %s" % thepropfile.filename,
                             "developer")

        thepropfile.makeindex()
        self.mixedkeys = self.mixer.match_entities(thepropfile.id_index)
        # we try and merge the header po with any comments at the start of the
        # properties file
        appendedheader = False
        waitingcomments = []
        for propunit in thepropfile.units:
            try:
                pounit = self.convertpropunit(thepropfile, propunit,
                                              "developer")
            except DiscardUnit:
                continue
            if pounit is None:
                waitingcomments.extend(propunit.comments)
            if not appendedheader:
                if propunit.isblank():
                    targetheader.addnote("\n".join(waitingcomments).rstrip(),
                                         "developer",
                                         position="prepend")
                    waitingcomments = []
                    pounit = None
                appendedheader = True
            if pounit is not None:
                pounit.addnote("\n".join(waitingcomments).rstrip(),
                               "developer",
                               position="prepend")
                waitingcomments = []
                thetargetfile.addunit(pounit)
        if self.personality == "gaia":
            thetargetfile = self.fold_gaia_plurals(thetargetfile)
        elif self.personality == "gwt":
            thetargetfile = self.fold_gwt_plurals(thetargetfile)
        thetargetfile.removeduplicates(self.duplicatestyle)
        return thetargetfile

    def mergestore(self, origpropfile, translatedpropfile):
        """converts two .properties files to a .po file..."""
        thetargetfile = po.pofile()
        if self.personality in ("mozilla", "skype"):
            targetheader = thetargetfile.init_headers(
                x_accelerator_marker="&",
                x_merge_on="location",
            )
        else:
            targetheader = thetargetfile.header()
        targetheader.addnote(
            "extracted from %s, %s" %
            (origpropfile.filename, translatedpropfile.filename), "developer")
        origpropfile.makeindex()
        #TODO: self.mixedkeys is overwritten below, so this is useless:
        self.mixedkeys = self.mixer.match_entities(origpropfile.id_index)
        translatedpropfile.makeindex()
        self.mixedkeys = self.mixer.match_entities(translatedpropfile.id_index)
        # we try and merge the header po with any comments at the start of
        # the properties file
        appendedheader = False
        waitingcomments = []
        # loop through the original file, looking at units one by one
        for origprop in origpropfile.units:
            try:
                origpo = self.convertpropunit(origpropfile, origprop,
                                              "developer")
            except DiscardUnit:
                continue
            if origpo is None:
                waitingcomments.extend(origprop.comments)
            # handle the header case specially...
            if not appendedheader:
                if origprop.isblank():
                    targetheader.addnote("".join(waitingcomments).rstrip(),
                                         "developer",
                                         position="prepend")
                    waitingcomments = []
                    origpo = None
                appendedheader = True
            # try and find a translation of the same name...
            if origprop.name in translatedpropfile.locationindex:
                translatedprop = translatedpropfile.locationindex[
                    origprop.name]
                # Need to check that this comment is not a copy of the
                # developer comments
                try:
                    translatedpo = self.convertpropunit(
                        translatedpropfile, translatedprop, "translator")
                except DiscardUnit:
                    continue
            else:
                translatedpo = None
            # if we have a valid po unit, get the translation and add it...
            if origpo is not None:
                if translatedpo is not None and not self.blankmsgstr:
                    origpo.target = translatedpo.source
                origpo.addnote("".join(waitingcomments).rstrip(),
                               "developer",
                               position="prepend")
                waitingcomments = []
                thetargetfile.addunit(origpo)
            elif translatedpo is not None:
                logger.error(
                    "didn't convert original property definition '%s'",
                    origprop.name)
        if self.personality == "gaia":
            thetargetfile = self.fold_gaia_plurals(thetargetfile)
        elif self.personality == "gwt":
            thetargetfile = self.fold_gwt_plurals(thetargetfile)
        thetargetfile.removeduplicates(self.duplicatestyle)
        return thetargetfile

    def fold_gwt_plurals(self, postore):
        """Fold the multiple plural units of a gwt file into a gettext plural."""
        def _append_plural_unit(plural_unit, units):
            sources = [u.source for u in units]
            targets = [u.target for u in units]
            # TODO: only consider the right ones for sources and targets
            plural_unit.source = sources
            plural_unit.target = targets
            plural_unit.addlocation(key)

        # Map GWT variants to cldr names
        gwt2cldr = {
            'none': 'zero',
            'one': 'one',
            'two': 'two',
            'few': 'few',
            'many': 'many',
            '': 'other',
        }

        class Variants(object):
            def __init__(self, unit):
                self.unit = unit
                self.variants = {}

        from translate.lang import data
        import re
        regex = re.compile(r'([^\[\]]*)(?:\[(.*)\])?')
        names = data.cldr_plural_categories
        new_store = type(postore)()
        plurals = {}
        for unit in postore.units:
            if not unit.istranslatable():
                #TODO: reconsider: we could lose header comments here
                continue
            string = unit.getlocations()[0]
            match = regex.match(string)
            if not match:
                logger.warn("Invalid key: %s" % (string))
                continue
            key = match.group(1)
            variant = match.group(2)
            if key not in plurals:
                # Generate fake unit for each keys
                new_unit = new_store.addsourceunit("fish")
                plurals[key] = Variants(new_unit)

            # No variant => other
            if not variant:
                variant = ""

            # Translate gwt variants to cldr names
            old_variant = variant
            variant = gwt2cldr.get(variant)

            # Some sanity checks
            if not variant:
                raise Exception("Variant invalid: %s" % (old_variant))
            if variant in plurals[key].variants:
                logger.warn("Override %s[%s]: %s by %s" %
                            (key, variant, str(
                                plurals[key].variants[variant]), str(unit)))

            # Put the unit
            plurals[key].variants[variant] = unit

        # Rework the set
        for key, plural in plurals.items():
            # We should have at least "other" (no variant in GWT)
            if "other" not in plural.variants:
                raise Exception("Should have property %s without any variant" %
                                (key))
            units = []
            for name in names:
                if name in plural.variants:
                    unit = plural.variants[name]
                    unit.target = unit.source
                    units.append(unit)
            # Replace the sources by good ones
            if "one" in plural.variants and len(units) > 0:
                units[0].source = plural.variants["one"].source
            if "other" in plural.variants and len(units) > 1:
                units[1].source = plural.variants["other"].source

            # Create the plural unit
            _append_plural_unit(plural.unit, units)
        return new_store

    def fold_gaia_plurals(self, postore):
        """Fold the multiple plural units of a gaia file into a gettext plural."""
        def _append_plural_unit(store, plurals, plural):
            units = plurals[plural]
            sources = [u.source for u in units]
            targets = [u.target for u in units]
            # TODO: only consider the right ones for sources and targets
            plural_unit = store.addsourceunit(sources)
            plural_unit.target = targets
            plural_unit.addlocation(plural)
            del plurals[plural]

        new_store = type(postore)()
        plurals = {}
        current_plural = ""
        for unit in postore.units:
            if not unit.istranslatable():
                #TODO: reconsider: we could lose header comments here
                continue
            if "plural(n)" in unit.source:
                if current_plural:
                    # End of a set of plural units
                    _append_plural_unit(new_store, plurals, current_plural)
                    current_plural = ""
                # start of a set of plural units
                location = unit.getlocations()[0]
                current_plural = location
                plurals[location] = []
                # We ignore the first one, since it doesn't contain translatable
                # text, only a marker.
            else:
                location = unit.getlocations()[0]
                if current_plural and location.startswith(current_plural):
                    plurals[current_plural].append(unit)
                    if '[zero]' not in location:
                        # We want to keep [zero] cases separately translatable
                        continue
                elif current_plural:
                    # End of a set of plural units
                    _append_plural_unit(new_store, plurals, current_plural)
                    current_plural = ""

                new_store.addunit(unit)

        if current_plural:
            # The file ended with a set of plural units
            _append_plural_unit(new_store, plurals, current_plural)
            current_plural = ""

        # if everything went well, there should be nothing left in plurals
        if len(plurals) != 0:
            logger.warning("Not all plural units converted correctly:" +
                           "\n".join(plurals))
        return new_store

    def convertunit(self, propunit, commenttype):
        """Converts a .properties unit to a .po unit. Returns None if empty or
        not for translation.
        """
        if propunit is None:
            return None
        # escape unicode
        pounit = po.pounit(encoding="UTF-8")
        if hasattr(propunit, "comments"):
            for comment in propunit.comments:
                if "DONT_TRANSLATE" in comment:
                    raise DiscardUnit(comment)
            pounit.addnote(propunit.getnotes().rstrip(), commenttype)
        # TODO: handle multiline msgid
        if propunit.isblank():
            return None
        pounit.addlocation(propunit.name)
        pounit.source = propunit.source
        pounit.target = ""
        return pounit

    def convertmixedunit(self, labelprop, accesskeyprop, commenttype):
        label_unit = self.convertunit(labelprop, commenttype)
        accesskey_unit = self.convertunit(accesskeyprop, commenttype)
        if label_unit is None:
            return accesskey_unit
        if accesskey_unit is None:
            return label_unit
        target_unit = po.pounit(encoding="UTF-8")
        return self.mixer.mix_units(label_unit, accesskey_unit, target_unit)

    def convertpropunit(self,
                        store,
                        unit,
                        commenttype,
                        mixbucket="properties"):
        """Converts a unit from store to a po unit, keeping track of mixed
        names along the way.

        ``mixbucket`` can be specified to indicate if the given unit is part of
        the template or the translated file.
        """
        if self.personality != "mozilla" and self.personality != "gwt":
            # XXX should we enable unit mixing for other personalities?
            return self.convertunit(unit, commenttype)

        # keep track of whether accesskey and label were combined
        key = unit.getid()
        if key not in self.mixedkeys:
            return self.convertunit(unit, commenttype)

        # use special convertmixed unit which produces one pounit with
        # both combined for the label and None for the accesskey
        alreadymixed = self.mixedkeys[key].get(mixbucket, None)
        if alreadymixed:
            # we are successfully throwing this away...
            return None
        elif alreadymixed is False:
            # The mix failed before
            return self.convertunit(unit, commenttype)

        #assert alreadymixed is None
        labelkey, accesskeykey = self.mixer.find_mixed_pair(
            self.mixedkeys, store, unit)
        labelprop = store.id_index.get(labelkey, None)
        accesskeyprop = store.id_index.get(accesskeykey, None)
        po_unit = self.convertmixedunit(labelprop, accesskeyprop, commenttype)
        if po_unit is not None:
            if accesskeykey is not None:
                self.mixedkeys[accesskeykey][mixbucket] = True
            if labelkey is not None:
                self.mixedkeys[labelkey][mixbucket] = True
            return po_unit
        else:
            # otherwise the mix failed. add each one separately and
            # remember they weren't mixed
            if accesskeykey is not None:
                self.mixedkeys[accesskeykey][mixbucket] = False
            if labelkey is not None:
                self.mixedkeys[labelkey][mixbucket] = False

        return self.convertunit(unit, commenttype)
예제 #4
0
class prop2po:
    """convert a .properties file to a .po file for handling the
    translation."""

    def __init__(self, personality="java", blankmsgstr=False,
                 duplicatestyle="msgctxt"):
        self.personality = personality
        self.blankmsgstr = blankmsgstr
        self.duplicatestyle = duplicatestyle
        self.mixedkeys = {}
        self.mixer = UnitMixer(properties.labelsuffixes,
                               properties.accesskeysuffixes)

    def convertstore(self, thepropfile):
        """converts a .properties file to a .po file..."""
        thetargetfile = po.pofile()
        if self.personality in ("mozilla", "skype"):
            targetheader = thetargetfile.init_headers(
                    x_accelerator_marker="&",
                    x_merge_on="location",
            )
        else:
            targetheader = thetargetfile.header()
        targetheader.addnote("extracted from %s" % thepropfile.filename,
                             "developer")

        thepropfile.makeindex()
        self.mixedkeys = self.mixer.match_entities(thepropfile.id_index)
        # we try and merge the header po with any comments at the start of the
        # properties file
        appendedheader = False
        waitingcomments = []
        for propunit in thepropfile.units:
            pounit = self.convertpropunit(thepropfile, propunit, "developer")
            if pounit is None:
                waitingcomments.extend(propunit.comments)
            # FIXME the storage class should not be creating blank units
            if pounit is "discard":
                continue
            if not appendedheader:
                if propunit.isblank():
                    targetheader.addnote("\n".join(waitingcomments).rstrip(),
                                         "developer", position="prepend")
                    waitingcomments = []
                    pounit = None
                appendedheader = True
            if pounit is not None:
                pounit.addnote("\n".join(waitingcomments).rstrip(),
                               "developer", position="prepend")
                waitingcomments = []
                thetargetfile.addunit(pounit)
        if self.personality == "gaia":
            thetargetfile = self.fold_gaia_plurals(thetargetfile)
        thetargetfile.removeduplicates(self.duplicatestyle)
        return thetargetfile

    def mergestore(self, origpropfile, translatedpropfile):
        """converts two .properties files to a .po file..."""
        thetargetfile = po.pofile()
        if self.personality in ("mozilla", "skype"):
            targetheader = thetargetfile.init_headers(
                    x_accelerator_marker="&",
                    x_merge_on="location",
            )
        else:
            targetheader = thetargetfile.header()
        targetheader.addnote("extracted from %s, %s" % (origpropfile.filename, translatedpropfile.filename),
                             "developer")
        origpropfile.makeindex()
        #TODO: self.mixedkeys is overwritten below, so this is useless:
        self.mixedkeys = self.mixer.match_entities(origpropfile.id_index)
        translatedpropfile.makeindex()
        self.mixedkeys = self.mixer.match_entities(translatedpropfile.id_index)
        # we try and merge the header po with any comments at the start of
        # the properties file
        appendedheader = False
        waitingcomments = []
        # loop through the original file, looking at units one by one
        for origprop in origpropfile.units:
            origpo = self.convertpropunit(origpropfile, origprop, "developer")
            if origpo is None:
                waitingcomments.extend(origprop.comments)
            # FIXME the storage class should not be creating blank units
            if origpo is "discard":
                continue
            # handle the header case specially...
            if not appendedheader:
                if origprop.isblank():
                    targetheader.addnote(u"".join(waitingcomments).rstrip(),
                                         "developer", position="prepend")
                    waitingcomments = []
                    origpo = None
                appendedheader = True
            # try and find a translation of the same name...
            if origprop.name in translatedpropfile.locationindex:
                translatedprop = translatedpropfile.locationindex[origprop.name]
                # Need to check that this comment is not a copy of the
                # developer comments
                translatedpo = self.convertpropunit(translatedpropfile,
                                                    translatedprop,
                                                    "translator")
                if translatedpo is "discard":
                    continue
            else:
                translatedpo = None
            # if we have a valid po unit, get the translation and add it...
            if origpo is not None:
                if translatedpo is not None and not self.blankmsgstr:
                    origpo.target = translatedpo.source
                origpo.addnote(u"".join(waitingcomments).rstrip(),
                               "developer", position="prepend")
                waitingcomments = []
                thetargetfile.addunit(origpo)
            elif translatedpo is not None:
                logger.error("didn't convert original property definition '%s'",
                             origprop.name)
        if self.personality == "gaia":
            thetargetfile = self.fold_gaia_plurals(thetargetfile)
        thetargetfile.removeduplicates(self.duplicatestyle)
        return thetargetfile

    def fold_gaia_plurals(self, postore):
        """Fold the multiple plural units of a gaia file into a gettext plural."""

        def _append_plural_unit(store, plurals, plural):
            units = plurals[plural]
            sources = [u.source for u in units]
            targets = [u.target for u in units]
            # TODO: only consider the right ones for sources and targets
            plural_unit = store.addsourceunit(sources)
            plural_unit.target = targets
            plural_unit.addlocation(plural)
            del plurals[plural]

        new_store = type(postore)()
        plurals = {}
        current_plural = u""
        for unit in postore.units:
            if not unit.istranslatable():
                #TODO: reconsider: we could lose header comments here
                continue
            if u"plural(n)" in unit.source:
                if current_plural:
                    # End of a set of plural units
                    _append_plural_unit(new_store, plurals, current_plural)
                    current_plural = u""
                # start of a set of plural units
                location = unit.getlocations()[0]
                current_plural = location
                plurals[location] = []
                # We ignore the first one, since it doesn't contain translatable
                # text, only a marker.
            else:
                location = unit.getlocations()[0]
                if current_plural and location.startswith(current_plural):
                    plurals[current_plural].append(unit)
                    if not '[zero]' in location:
                        # We want to keep [zero] cases separately translatable
                        continue
                elif current_plural:
                    # End of a set of plural units
                    _append_plural_unit(new_store, plurals, current_plural)
                    current_plural = u""

                new_store.addunit(unit)

        if current_plural:
            # The file ended with a set of plural units
            _append_plural_unit(new_store, plurals, current_plural)
            current_plural = u""

        # if everything went well, there should be nothing left in plurals
        if len(plurals) != 0:
            logger.warning("Not all plural units converted correctly:" +
                           "\n".join(plurals.keys()))
        return new_store

    def convertunit(self, propunit, commenttype):
        """Converts a .properties unit to a .po unit. Returns None if empty
        or not for translation."""
        if propunit is None:
            return None
        # escape unicode
        pounit = po.pounit(encoding="UTF-8")
        if hasattr(propunit, "comments"):
            for comment in propunit.comments:
                if "DONT_TRANSLATE" in comment:
                    return "discard"
            pounit.addnote(u"".join(propunit.getnotes()).rstrip(), commenttype)
        # TODO: handle multiline msgid
        if propunit.isblank():
            return None
        pounit.addlocation(propunit.name)
        pounit.source = propunit.source
        pounit.target = u""
        return pounit

    def convertmixedunit(self, labelprop, accesskeyprop, commenttype):
        label_unit = self.convertunit(labelprop, commenttype)
        accesskey_unit = self.convertunit(accesskeyprop, commenttype)
        if label_unit is None:
            return accesskey_unit
        if accesskey_unit is None:
            return label_unit
        target_unit = po.pounit(encoding="UTF-8")
        return self.mixer.mix_units(label_unit, accesskey_unit, target_unit)

    def convertpropunit(self, store, unit, commenttype, mixbucket="dtd"):
        """Converts a unit from store to a po unit, keeping track of mixed
        names along the way.

        ``mixbucket`` can be specified to indicate if the given unit is part of
        the template or the translated file.
        """
        if self.personality != "mozilla":
            # XXX should we enable unit mixing for other personalities?
            return self.convertunit(unit, commenttype)

        # keep track of whether accesskey and label were combined
        key = unit.getid()
        if key not in self.mixedkeys:
            return self.convertunit(unit, commenttype)

        # use special convertmixed unit which produces one pounit with
        # both combined for the label and None for the accesskey
        alreadymixed = self.mixedkeys[key].get(mixbucket, None)
        if alreadymixed:
            # we are successfully throwing this away...
            return None
        elif alreadymixed is False:
            # The mix failed before
            return self.convertunit(unit, commenttype)

        #assert alreadymixed is None
        labelkey, accesskeykey = self.mixer.find_mixed_pair(self.mixedkeys, store, unit)
        labelprop = store.id_index.get(labelkey, None)
        accesskeyprop = store.id_index.get(accesskeykey, None)
        po_unit = self.convertmixedunit(labelprop, accesskeyprop, commenttype)
        if po_unit is not None:
            if accesskeykey is not None:
                self.mixedkeys[accesskeykey][mixbucket] = True
            if labelkey is not None:
                self.mixedkeys[labelkey][mixbucket] = True
            return po_unit
        else:
            # otherwise the mix failed. add each one separately and
            # remember they weren't mixed
            if accesskeykey is not None:
                self.mixedkeys[accesskeykey][mixbucket] = False
            if labelkey is not None:
                self.mixedkeys[labelkey][mixbucket] = False

        return self.convertunit(unit, commenttype)
예제 #5
0
 def __init__(self, blankmsgstr=False, duplicatestyle="msgctxt"):
     self.currentgroup = None
     self.blankmsgstr = blankmsgstr
     self.duplicatestyle = duplicatestyle
     self.mixedentities = {}
     self.mixer = UnitMixer(dtd.labelsuffixes, dtd.accesskeysuffixes)
예제 #6
0
class dtd2po:

    def __init__(self, blankmsgstr=False, duplicatestyle="msgctxt"):
        self.currentgroup = None
        self.blankmsgstr = blankmsgstr
        self.duplicatestyle = duplicatestyle
        self.mixedentities = {}
        self.mixer = UnitMixer(dtd.labelsuffixes, dtd.accesskeysuffixes)

    def convertcomments(self, dtd_unit, po_unit):
        entity = dtd_unit.getid()
        if len(entity) > 0:
            po_unit.addlocation(entity)
        for commenttype, comment in dtd_unit.comments:
            # handle groups
            if (commenttype == "locgroupstart"):
                groupcomment = comment.replace('BEGIN', 'GROUP')
                self.currentgroup = groupcomment
            elif (commenttype == "locgroupend"):
                groupcomment = comment.replace('END', 'GROUP')
                self.currentgroup = None
            # handle automatic comment
            if commenttype == "automaticcomment":
                po_unit.addnote(comment, origin="developer")
            # handle normal comments
            else:
                po_unit.addnote(quote.stripcomment(comment), origin="developer")
        # handle group stuff
        if self.currentgroup is not None:
            po_unit.addnote(quote.stripcomment(self.currentgroup),
                          origin="translator")
        if is_css_entity(entity):
            po_unit.addnote("Do not translate this.  Only change the numeric values if you need this dialogue box to appear bigger",
                          origin="developer")

    def convertstrings(self, dtd_unit, po_unit):
        # extract the string, get rid of quoting
        unquoted = dtd_unit.source.replace("\r", "")
        # escape backslashes... but not if they're for a newline
        # unquoted = unquoted.replace("\\", "\\\\").replace("\\\\n", "\\n")
        # now split the string into lines and quote them
        lines = unquoted.split('\n')
        while lines and not lines[0].strip():
            del lines[0]
        while lines and not lines[-1].strip():
            del lines[-1]
        # quotes have been escaped already by escapeforpo, so just add the
        # start and end quotes
        if len(lines) > 1:
            po_unit.source = "\n".join([lines[0].rstrip() + ' '] +
                    [line.strip() + ' ' for line in lines[1:-1]] +
                    [lines[-1].lstrip()])
        elif lines:
            po_unit.source = lines[0]
        else:
            po_unit.source = ""
        po_unit.target = ""

    def convertunit(self, dtd_unit):
        """Converts a simple (non-mixed) dtd unit into a po unit.

        Returns None if empty or not for translation.
        """
        if dtd_unit is None:
            return None
        po_unit = po.pounit(encoding="UTF-8")
        # remove unwanted stuff
        for commentnum in range(len(dtd_unit.comments)):
            commenttype, locnote = dtd_unit.comments[commentnum]
            # if this is a localization note
            if commenttype == 'locnote':
                # parse the locnote into the entity and the actual note
                typeend = quote.findend(locnote, 'LOCALIZATION NOTE')
                # parse the id
                idstart = locnote.find('(', typeend)
                if idstart == -1:
                    continue
                idend = locnote.find(')', (idstart + 1))
                entity = locnote[idstart+1:idend].strip()
                # parse the actual note
                actualnotestart = locnote.find(':', (idend + 1))
                actualnoteend = locnote.find('-->', idend)
                actualnote = locnote[actualnotestart+1:actualnoteend].strip()
                # if it's for this entity, process it
                if dtd_unit.getid() == entity:
                    # if it says don't translate (and nothing more),
                    if actualnote.startswith("DONT_TRANSLATE"):
                        # take out the entity,definition and the
                        # DONT_TRANSLATE comment
                        dtd_unit.setid("")
                        dtd_unit.source = ""
                        del dtd_unit.comments[commentnum]
                        # finished this for loop
                        break
                    else:
                        # convert it into an automatic comment, to be
                        # processed by convertcomments
                        dtd_unit.comments[commentnum] = ("automaticcomment",
                                                       actualnote)
        # do a standard translation
        self.convertcomments(dtd_unit, po_unit)
        self.convertstrings(dtd_unit, po_unit)
        if po_unit.isblank() and not po_unit.getlocations():
            return None
        else:
            return po_unit

    def convertmixedunit(self, labeldtd, accesskeydtd):
        label_unit = self.convertunit(labeldtd)
        accesskey_unit = self.convertunit(accesskeydtd)
        if label_unit is None:
            return accesskey_unit
        if accesskey_unit is None:
            return label_unit
        target_unit = po.pounit(encoding="UTF-8")
        return self.mixer.mix_units(label_unit, accesskey_unit, target_unit)

    def convertdtdunit(self, store, unit, mixbucket="dtd"):
        """Converts a unit from store to a po unit, keeping track of mixed
        entities along the way.

        ``mixbucket`` can be specified to indicate if the given unit is part of
        the template or the translated file.
        """
        # keep track of whether accesskey and label were combined
        entity = unit.getid()
        if entity not in self.mixedentities:
            return self.convertunit(unit)

        # use special convertmixed unit which produces one pounit with
        # both combined for the label and None for the accesskey
        alreadymixed = self.mixedentities[entity].get(mixbucket, None)
        if alreadymixed:
            # we are successfully throwing this away...
            return None
        elif alreadymixed is False:
            # The mix failed before
            return self.convertunit(unit)

        #assert alreadymixed is None
        labelentity, accesskeyentity = self.mixer.find_mixed_pair(self.mixedentities, store, unit)
        labeldtd = store.index.get(labelentity, None)
        accesskeydtd = store.index.get(accesskeyentity, None)
        po_unit = self.convertmixedunit(labeldtd, accesskeydtd)
        if po_unit is not None:
            if accesskeyentity is not None:
                self.mixedentities[accesskeyentity][mixbucket] = True
            if labelentity is not None:
                self.mixedentities[labelentity][mixbucket] = True
            return po_unit
        else:
            # otherwise the mix failed. add each one separately and
            # remember they weren't mixed
            if accesskeyentity is not None:
                self.mixedentities[accesskeyentity][mixbucket] = False
            if labelentity is not None:
                self.mixedentities[labelentity][mixbucket] = False

        return self.convertunit(unit)

    def convertstore(self, dtd_store):
        target_store = po.pofile()
        targetheader = target_store.init_headers(
                x_accelerator_marker="&",
                x_merge_on="location",
        )
        targetheader.addnote("extracted from %s" % dtd_store.filename,
                             "developer")

        dtd_store.makeindex()
        self.mixedentities = self.mixer.match_entities(dtd_store.index)
        # go through the dtd and convert each unit
        for dtd_unit in dtd_store.units:
            if not dtd_unit.istranslatable():
                continue
            po_unit = self.convertdtdunit(dtd_store, dtd_unit)
            if po_unit is not None:
                target_store.addunit(po_unit)
        target_store.removeduplicates(self.duplicatestyle)
        return target_store

    def mergestore(self, origdtdfile, translateddtdfile):
        target_store = po.pofile()
        targetheader = target_store.init_headers(
                x_accelerator_marker="&",
                x_merge_on="location",
        )
        targetheader.addnote("extracted from %s, %s" %
                             (origdtdfile.filename,
                              translateddtdfile.filename),
                             "developer")

        origdtdfile.makeindex()
        #TODO: self.mixedentities is overwritten below, so this is useless:
        self.mixedentities = self.mixer.match_entities(origdtdfile.index)
        translateddtdfile.makeindex()
        self.mixedentities = self.mixer.match_entities(translateddtdfile.index)
        # go through the dtd files and convert each unit
        for origdtd in origdtdfile.units:
            if not origdtd.istranslatable():
                continue
            origpo = self.convertdtdunit(origdtdfile, origdtd,
                                         mixbucket="orig")
            orig_entity = origdtd.getid()
            if orig_entity in self.mixedentities:
                mixedentitydict = self.mixedentities[orig_entity]
                if "orig" not in mixedentitydict:
                    # this means that the entity is mixed in the translation,
                    # but not the original - treat as unmixed
                    mixbucket = "orig"
                    del self.mixedentities[orig_entity]
                elif mixedentitydict["orig"]:
                    # the original entity is already mixed successfully
                    mixbucket = "translate"
                else:
                    # ??
                    mixbucket = "orig"
            else:
                mixbucket = "translate"
            if origpo is None:
                # this means its a mixed entity (with accesskey) that's
                # already been dealt with)
                continue
            if orig_entity in translateddtdfile.index:
                translateddtd = translateddtdfile.index[orig_entity]
                translatedpo = self.convertdtdunit(translateddtdfile,
                                                   translateddtd,
                                                   mixbucket=mixbucket)
            else:
                translatedpo = None
            if origpo is not None:
                if translatedpo is not None and not self.blankmsgstr:
                    origpo.target = translatedpo.source
                target_store.addunit(origpo)
        target_store.removeduplicates(self.duplicatestyle)
        return target_store
예제 #7
0
class prop2po:
    """convert a .properties file to a .po file for handling the
    translation."""
    def __init__(self,
                 personality="java",
                 blankmsgstr=False,
                 duplicatestyle="msgctxt"):
        self.personality = personality
        self.blankmsgstr = blankmsgstr
        self.duplicatestyle = duplicatestyle
        self.mixedkeys = {}
        self.mixer = UnitMixer(properties.labelsuffixes,
                               properties.accesskeysuffixes)

    def convertstore(self, thepropfile):
        """converts a .properties file to a .po file..."""
        thetargetfile = po.pofile()
        if self.personality in ("mozilla", "skype"):
            targetheader = thetargetfile.init_headers(
                x_accelerator_marker="&",
                x_merge_on="location",
            )
        else:
            targetheader = thetargetfile.header()
        targetheader.addnote("extracted from %s" % thepropfile.filename,
                             "developer")

        thepropfile.makeindex()
        self.mixedkeys = self.mixer.match_entities(thepropfile.id_index)
        # we try and merge the header po with any comments at the start of the
        # properties file
        appendedheader = False
        waitingcomments = []
        for propunit in thepropfile.units:
            pounit = self.convertpropunit(thepropfile, propunit, "developer")
            if pounit is None:
                waitingcomments.extend(propunit.comments)
            # FIXME the storage class should not be creating blank units
            if pounit is "discard":
                continue
            if not appendedheader:
                if propunit.isblank():
                    targetheader.addnote("\n".join(waitingcomments).rstrip(),
                                         "developer",
                                         position="prepend")
                    waitingcomments = []
                    pounit = None
                appendedheader = True
            if pounit is not None:
                pounit.addnote("\n".join(waitingcomments).rstrip(),
                               "developer",
                               position="prepend")
                waitingcomments = []
                thetargetfile.addunit(pounit)
        if self.personality == "gaia":
            thetargetfile = self.fold_gaia_plurals(thetargetfile)
        thetargetfile.removeduplicates(self.duplicatestyle)
        return thetargetfile

    def mergestore(self, origpropfile, translatedpropfile):
        """converts two .properties files to a .po file..."""
        thetargetfile = po.pofile()
        if self.personality in ("mozilla", "skype"):
            targetheader = thetargetfile.init_headers(
                x_accelerator_marker="&",
                x_merge_on="location",
            )
        else:
            targetheader = thetargetfile.header()
        targetheader.addnote(
            "extracted from %s, %s" %
            (origpropfile.filename, translatedpropfile.filename), "developer")
        origpropfile.makeindex()
        #TODO: self.mixedkeys is overwritten below, so this is useless:
        self.mixedkeys = self.mixer.match_entities(origpropfile.id_index)
        translatedpropfile.makeindex()
        self.mixedkeys = self.mixer.match_entities(translatedpropfile.id_index)
        # we try and merge the header po with any comments at the start of
        # the properties file
        appendedheader = False
        waitingcomments = []
        # loop through the original file, looking at units one by one
        for origprop in origpropfile.units:
            origpo = self.convertpropunit(origpropfile, origprop, "developer")
            if origpo is None:
                waitingcomments.extend(origprop.comments)
            # FIXME the storage class should not be creating blank units
            if origpo is "discard":
                continue
            # handle the header case specially...
            if not appendedheader:
                if origprop.isblank():
                    targetheader.addnote(u"".join(waitingcomments).rstrip(),
                                         "developer",
                                         position="prepend")
                    waitingcomments = []
                    origpo = None
                appendedheader = True
            # try and find a translation of the same name...
            if origprop.name in translatedpropfile.locationindex:
                translatedprop = translatedpropfile.locationindex[
                    origprop.name]
                # Need to check that this comment is not a copy of the
                # developer comments
                translatedpo = self.convertpropunit(translatedpropfile,
                                                    translatedprop,
                                                    "translator")
                if translatedpo is "discard":
                    continue
            else:
                translatedpo = None
            # if we have a valid po unit, get the translation and add it...
            if origpo is not None:
                if translatedpo is not None and not self.blankmsgstr:
                    origpo.target = translatedpo.source
                origpo.addnote(u"".join(waitingcomments).rstrip(),
                               "developer",
                               position="prepend")
                waitingcomments = []
                thetargetfile.addunit(origpo)
            elif translatedpo is not None:
                logger.error(
                    "didn't convert original property definition '%s'",
                    origprop.name)
        if self.personality == "gaia":
            thetargetfile = self.fold_gaia_plurals(thetargetfile)
        thetargetfile.removeduplicates(self.duplicatestyle)
        return thetargetfile

    def fold_gaia_plurals(self, postore):
        """Fold the multiple plural units of a gaia file into a gettext plural."""
        def _append_plural_unit(store, plurals, plural):
            units = plurals[plural]
            sources = [u.source for u in units]
            targets = [u.target for u in units]
            # TODO: only consider the right ones for sources and targets
            plural_unit = store.addsourceunit(sources)
            plural_unit.target = targets
            plural_unit.addlocation(plural)
            del plurals[plural]

        new_store = type(postore)()
        plurals = {}
        current_plural = u""
        for unit in postore.units:
            if not unit.istranslatable():
                #TODO: reconsider: we could lose header comments here
                continue
            if u"plural(n)" in unit.source:
                if current_plural:
                    # End of a set of plural units
                    _append_plural_unit(new_store, plurals, current_plural)
                    current_plural = u""
                # start of a set of plural units
                location = unit.getlocations()[0]
                current_plural = location
                plurals[location] = []
                # We ignore the first one, since it doesn't contain translatable
                # text, only a marker.
            else:
                location = unit.getlocations()[0]
                if current_plural and location.startswith(current_plural):
                    plurals[current_plural].append(unit)
                    if not '[zero]' in location:
                        # We want to keep [zero] cases separately translatable
                        continue
                elif current_plural:
                    # End of a set of plural units
                    _append_plural_unit(new_store, plurals, current_plural)
                    current_plural = u""

                new_store.addunit(unit)

        if current_plural:
            # The file ended with a set of plural units
            _append_plural_unit(new_store, plurals, current_plural)
            current_plural = u""

        # if everything went well, there should be nothing left in plurals
        if len(plurals) != 0:
            logger.warning("Not all plural units converted correctly:" +
                           "\n".join(plurals.keys()))
        return new_store

    def convertunit(self, propunit, commenttype):
        """Converts a .properties unit to a .po unit. Returns None if empty
        or not for translation."""
        if propunit is None:
            return None
        # escape unicode
        pounit = po.pounit(encoding="UTF-8")
        if hasattr(propunit, "comments"):
            for comment in propunit.comments:
                if "DONT_TRANSLATE" in comment:
                    return "discard"
            pounit.addnote(u"".join(propunit.getnotes()).rstrip(), commenttype)
        # TODO: handle multiline msgid
        if propunit.isblank():
            return None
        pounit.addlocation(propunit.name)
        pounit.source = propunit.source
        pounit.target = u""
        return pounit

    def convertmixedunit(self, labelprop, accesskeyprop, commenttype):
        label_unit = self.convertunit(labelprop, commenttype)
        accesskey_unit = self.convertunit(accesskeyprop, commenttype)
        if label_unit is None:
            return accesskey_unit
        if accesskey_unit is None:
            return label_unit
        target_unit = po.pounit(encoding="UTF-8")
        return self.mixer.mix_units(label_unit, accesskey_unit, target_unit)

    def convertpropunit(self, store, unit, commenttype, mixbucket="dtd"):
        """Converts a unit from store to a po unit, keeping track of mixed
        names along the way.

        ``mixbucket`` can be specified to indicate if the given unit is part of
        the template or the translated file.
        """
        if self.personality != "mozilla":
            # XXX should we enable unit mixing for other personalities?
            return self.convertunit(unit, commenttype)

        # keep track of whether accesskey and label were combined
        key = unit.getid()
        if key not in self.mixedkeys:
            return self.convertunit(unit, commenttype)

        # use special convertmixed unit which produces one pounit with
        # both combined for the label and None for the accesskey
        alreadymixed = self.mixedkeys[key].get(mixbucket, None)
        if alreadymixed:
            # we are successfully throwing this away...
            return None
        elif alreadymixed is False:
            # The mix failed before
            return self.convertunit(unit, commenttype)

        #assert alreadymixed is None
        labelkey, accesskeykey = self.mixer.find_mixed_pair(
            self.mixedkeys, store, unit)
        labelprop = store.id_index.get(labelkey, None)
        accesskeyprop = store.id_index.get(accesskeykey, None)
        po_unit = self.convertmixedunit(labelprop, accesskeyprop, commenttype)
        if po_unit is not None:
            if accesskeykey is not None:
                self.mixedkeys[accesskeykey][mixbucket] = True
            if labelkey is not None:
                self.mixedkeys[labelkey][mixbucket] = True
            return po_unit
        else:
            # otherwise the mix failed. add each one separately and
            # remember they weren't mixed
            if accesskeykey is not None:
                self.mixedkeys[accesskeykey][mixbucket] = False
            if labelkey is not None:
                self.mixedkeys[labelkey][mixbucket] = False

        return self.convertunit(unit, commenttype)
예제 #8
0
 def __init__(self, blankmsgstr=False, duplicatestyle="msgctxt"):
     self.currentgroup = None
     self.blankmsgstr = blankmsgstr
     self.duplicatestyle = duplicatestyle
     self.mixedentities = {}
     self.mixer = UnitMixer(dtd.labelsuffixes, dtd.accesskeysuffixes)
예제 #9
0
class dtd2po:
    def __init__(self, blankmsgstr=False, duplicatestyle="msgctxt"):
        self.currentgroup = None
        self.blankmsgstr = blankmsgstr
        self.duplicatestyle = duplicatestyle
        self.mixedentities = {}
        self.mixer = UnitMixer(dtd.labelsuffixes, dtd.accesskeysuffixes)

    def convertcomments(self, dtd_unit, po_unit):
        entity = dtd_unit.getid()
        if len(entity) > 0:
            po_unit.addlocation(entity)
        for commenttype, comment in dtd_unit.comments:
            # handle groups
            if (commenttype == "locgroupstart"):
                groupcomment = comment.replace('BEGIN', 'GROUP')
                self.currentgroup = groupcomment
            elif (commenttype == "locgroupend"):
                groupcomment = comment.replace('END', 'GROUP')
                self.currentgroup = None
            # handle automatic comment
            if commenttype == "automaticcomment":
                po_unit.addnote(comment, origin="developer")
            # handle normal comments
            else:
                po_unit.addnote(quote.stripcomment(comment),
                                origin="developer")
        # handle group stuff
        if self.currentgroup is not None:
            po_unit.addnote(quote.stripcomment(self.currentgroup),
                            origin="translator")
        if is_css_entity(entity):
            po_unit.addnote(
                "Do not translate this.  Only change the numeric values if you need this dialogue box to appear bigger",
                origin="developer")

    def convertstrings(self, dtd_unit, po_unit):
        # extract the string, get rid of quoting
        unquoted = dtd_unit.source.replace("\r", "")
        # escape backslashes... but not if they're for a newline
        # unquoted = unquoted.replace("\\", "\\\\").replace("\\\\n", "\\n")
        # now split the string into lines and quote them
        lines = unquoted.split('\n')
        while lines and not lines[0].strip():
            del lines[0]
        while lines and not lines[-1].strip():
            del lines[-1]
        # quotes have been escaped already by escapeforpo, so just add the
        # start and end quotes
        if len(lines) > 1:
            po_unit.source = "\n".join(
                [lines[0].rstrip() + ' '] +
                [line.strip() + ' '
                 for line in lines[1:-1]] + [lines[-1].lstrip()])
        elif lines:
            po_unit.source = lines[0]
        else:
            po_unit.source = ""
        po_unit.target = ""

    def convertunit(self, dtd_unit):
        """Converts a simple (non-mixed) dtd unit into a po unit.

        Returns None if empty or not for translation.
        """
        if dtd_unit is None:
            return None
        po_unit = po.pounit(encoding="UTF-8")
        # remove unwanted stuff
        for commentnum in range(len(dtd_unit.comments)):
            commenttype, locnote = dtd_unit.comments[commentnum]
            # if this is a localization note
            if commenttype == 'locnote':
                # parse the locnote into the entity and the actual note
                typeend = quote.findend(locnote, 'LOCALIZATION NOTE')
                # parse the id
                idstart = locnote.find('(', typeend)
                if idstart == -1:
                    continue
                idend = locnote.find(')', (idstart + 1))
                entity = locnote[idstart + 1:idend].strip()
                # parse the actual note
                actualnotestart = locnote.find(':', (idend + 1))
                actualnoteend = locnote.find('-->', idend)
                actualnote = locnote[actualnotestart + 1:actualnoteend].strip()
                # if it's for this entity, process it
                if dtd_unit.getid() == entity:
                    # if it says don't translate (and nothing more),
                    if actualnote.startswith("DONT_TRANSLATE"):
                        # take out the entity,definition and the
                        # DONT_TRANSLATE comment
                        dtd_unit.setid("")
                        dtd_unit.source = ""
                        del dtd_unit.comments[commentnum]
                        # finished this for loop
                        break
                    else:
                        # convert it into an automatic comment, to be
                        # processed by convertcomments
                        dtd_unit.comments[commentnum] = ("automaticcomment",
                                                         actualnote)
        # do a standard translation
        self.convertcomments(dtd_unit, po_unit)
        self.convertstrings(dtd_unit, po_unit)
        if po_unit.isblank() and not po_unit.getlocations():
            return None
        else:
            return po_unit

    def convertmixedunit(self, labeldtd, accesskeydtd):
        label_unit = self.convertunit(labeldtd)
        accesskey_unit = self.convertunit(accesskeydtd)
        if label_unit is None:
            return accesskey_unit
        if accesskey_unit is None:
            return label_unit
        target_unit = po.pounit(encoding="UTF-8")
        return self.mixer.mix_units(label_unit, accesskey_unit, target_unit)

    def convertdtdunit(self, store, unit, mixbucket="dtd"):
        """Converts a unit from store to a po unit, keeping track of mixed
        entities along the way.

        ``mixbucket`` can be specified to indicate if the given unit is part of
        the template or the translated file.
        """
        # keep track of whether accesskey and label were combined
        entity = unit.getid()
        if entity not in self.mixedentities:
            return self.convertunit(unit)

        # use special convertmixed unit which produces one pounit with
        # both combined for the label and None for the accesskey
        alreadymixed = self.mixedentities[entity].get(mixbucket, None)
        if alreadymixed:
            # we are successfully throwing this away...
            return None
        elif alreadymixed is False:
            # The mix failed before
            return self.convertunit(unit)

        #assert alreadymixed is None
        labelentity, accesskeyentity = self.mixer.find_mixed_pair(
            self.mixedentities, store, unit)
        labeldtd = store.id_index.get(labelentity, None)
        accesskeydtd = store.id_index.get(accesskeyentity, None)
        po_unit = self.convertmixedunit(labeldtd, accesskeydtd)
        if po_unit is not None:
            if accesskeyentity is not None:
                self.mixedentities[accesskeyentity][mixbucket] = True
            if labelentity is not None:
                self.mixedentities[labelentity][mixbucket] = True
            return po_unit
        else:
            # otherwise the mix failed. add each one separately and
            # remember they weren't mixed
            if accesskeyentity is not None:
                self.mixedentities[accesskeyentity][mixbucket] = False
            if labelentity is not None:
                self.mixedentities[labelentity][mixbucket] = False

        return self.convertunit(unit)

    def convertstore(self, dtd_store):
        target_store = po.pofile()
        targetheader = target_store.init_headers(
            x_accelerator_marker="&",
            x_merge_on="location",
        )
        targetheader.addnote("extracted from %s" % dtd_store.filename,
                             "developer")

        dtd_store.makeindex()
        self.mixedentities = self.mixer.match_entities(dtd_store.id_index)
        # go through the dtd and convert each unit
        for dtd_unit in dtd_store.units:
            if not dtd_unit.istranslatable():
                continue
            po_unit = self.convertdtdunit(dtd_store, dtd_unit)
            if po_unit is not None:
                target_store.addunit(po_unit)
        target_store.removeduplicates(self.duplicatestyle)
        return target_store

    def mergestore(self, origdtdfile, translateddtdfile):
        target_store = po.pofile()
        targetheader = target_store.init_headers(
            x_accelerator_marker="&",
            x_merge_on="location",
        )
        targetheader.addnote(
            "extracted from %s, %s" %
            (origdtdfile.filename, translateddtdfile.filename), "developer")

        origdtdfile.makeindex()
        #TODO: self.mixedentities is overwritten below, so this is useless:
        self.mixedentities = self.mixer.match_entities(origdtdfile.id_index)
        translateddtdfile.makeindex()
        self.mixedentities = self.mixer.match_entities(
            translateddtdfile.id_index)
        # go through the dtd files and convert each unit
        for origdtd in origdtdfile.units:
            if not origdtd.istranslatable():
                continue
            origpo = self.convertdtdunit(origdtdfile,
                                         origdtd,
                                         mixbucket="orig")
            orig_entity = origdtd.getid()
            if orig_entity in self.mixedentities:
                mixedentitydict = self.mixedentities[orig_entity]
                if "orig" not in mixedentitydict:
                    # this means that the entity is mixed in the translation,
                    # but not the original - treat as unmixed
                    mixbucket = "orig"
                    del self.mixedentities[orig_entity]
                elif mixedentitydict["orig"]:
                    # the original entity is already mixed successfully
                    mixbucket = "translate"
                else:
                    # ??
                    mixbucket = "orig"
            else:
                mixbucket = "translate"
            if origpo is None:
                # this means its a mixed entity (with accesskey) that's
                # already been dealt with)
                continue
            if orig_entity in translateddtdfile.id_index:
                translateddtd = translateddtdfile.id_index[orig_entity]
                translatedpo = self.convertdtdunit(translateddtdfile,
                                                   translateddtd,
                                                   mixbucket=mixbucket)
            else:
                translatedpo = None
            if origpo is not None:
                if translatedpo is not None and not self.blankmsgstr:
                    origpo.target = translatedpo.source
                target_store.addunit(origpo)
        target_store.removeduplicates(self.duplicatestyle)
        return target_store