Ejemplo n.º 1
0
def extract_countries_po():
    input_path = get_shape_path("ne_50m_admin_0_countries")

    for locale in os.listdir(LOCALE_DIR):
        locale_dir = path.join(LOCALE_DIR, locale)
        locale_out_dir = path.join(LOCALE_OUT_DIR, locale)

        if os.path.isdir(locale_dir):
            with fiona.open(input_path) as source:
                po = POFile(encoding='UTF-8')
                po.metadata = {"Content-Type": "text/plain; charset=utf-8"}
                output_path = path.join(locale_out_dir, "countries.po")

                if not path.exists(locale_out_dir):
                    os.makedirs(locale_out_dir)

                print "Generating {}/countries.po".format(locale)

                for feat in source:
                    props = lower_dict_keys(feat["properties"])

                    name_key = "name_" + map_locale(locale)
                    name_fallback = "name"

                    country_name = props.get("name")
                    formal_country_name = props.get("formal_en", country_name)

                    if props.get(name_key) is not None:
                        translated_name = props.get(name_key)
                    elif props.get(name_fallback) is not None:
                        translated_name = props.get(name_fallback)
                        print c.orange(u" Missing translation for {}".format(
                            translated_name))
                    else:
                        raise ValueError(
                            "Cannot find the translation for {}. Probe keys: {}"
                            .format(locale, (name_key, name_fallback)))

                    entry = POEntry(msgid=country_name, msgstr=translated_name)
                    po.append(entry)

                    # add additional record for the formal country name.
                    if country_name != formal_country_name and formal_country_name is not None:
                        entry = POEntry(msgid=formal_country_name,
                                        msgstr=translated_name)
                        po.append(entry)

                    # exception for the US
                    if props.get("iso_a3") == "USA":
                        entry = POEntry(msgid="USA", msgstr=translated_name)
                        po.append(entry)

                    # exception for the UK
                    if props.get("iso_a3") == "GBR":
                        entry = POEntry(msgid="UK", msgstr=translated_name)
                        po.append(entry)

                po.save(output_path)
                print c.green("Extracted {} countries for {} to {}".format(
                    len(po), locale, output_path))
Ejemplo n.º 2
0
def generate_po_from_tmx(f_path, entries):
    pofile = POFile()
    for tmx_entry in entries:
        po_entry = POEntry(msgid=tmx_entry.getsource(),
                           msgstr=tmx_entry.gettarget())
        pofile.append(po_entry)
    pofile.save(f_path)
Ejemplo n.º 3
0
    def test_success(self):
        with tempfile.NamedTemporaryFile(suffix=".pot") as potfile:
            poobj = POFile()
            poobj.append(POEntry(msgstr="test string"))
            poobj.save(potfile.name)

            self.assertTrue(testPOT(potfile.name))
Ejemplo n.º 4
0
    async def export_translation(lang):
        filename = f'export.{lang}.po'
        query = {'lang': lang}

        docs = await database['translations'].count_documents(query)
        res = await database['translations'].find(query).to_list(length=docs)

        po = POFile()
        po.metadata = {
            'Project-Id-Version': '1.0',
            'Report-Msgid-Bugs-To': '*****@*****.**',
            'POT-Creation-Date': '2020-10-04 21:00+0200',
            'PO-Revision-Date': '2020-10-04 21:00+0200',
            'Last-Translator': 'Support <*****@*****.**>',
            'Language-Team': 'English <*****@*****.**>',
            'MIME-Version': '1.0',
            'Content-Type': 'text/plain; charset=utf-8',
            'Content-Transfer-Encoding': '8bit'
        }

        if len(res) > 0:
            for value in res[0]['translation']:
                items = list(value.items())[0]

                entry = POEntry(msgid=items[0], msgstr=items[1])

                po.append(entry)

        po.save(f'./static/{filename}')

        if os.path.isfile(f'./static/{filename}'):
            return FileResponse(f'./static/{filename}')

        raise HTTPException(status_code=404, detail='Translation not found')
Ejemplo n.º 5
0
def extract_relay_locations_pot(countries):
    pot = POFile(encoding='utf-8', check_for_duplicates=True)
    pot.metadata = {"Content-Type": "text/plain; charset=utf-8"}
    output_path = path.join(LOCALE_OUT_DIR, RELAY_LOCATIONS_POT_FILENAME)

    print("Generating {}".format(output_path))

    for country in countries:
        country_name = country.get("name")
        if country_name is not None:
            entry = POEntry(msgid=country_name,
                            msgstr="",
                            comment=country.get("code").upper())
            pot.append(entry)
            print("{} ({})".format(country_name, country.get("code")))

        cities = country.get("cities")
        if cities is not None:
            for city in cities:
                city_name = city.get("name")
                if city_name is not None:
                    entry = POEntry(msgid=city_name,
                                    msgstr="",
                                    comment="{} {}".format(
                                        country.get("code").upper(),
                                        city.get("code").upper()))

                    try:
                        pot.append(entry)
                    except ValueError as err:
                        print(c.orange("Cannot add an entry: {}".format(err)))

                    print("{} ({})".format(city_name, city.get("code")))

    pot.save(output_path)
Ejemplo n.º 6
0
def extract_cities_po():
    input_path = get_shape_path("ne_50m_populated_places")
    stats = []

    for locale in os.listdir(LOCALE_DIR):
        locale_dir = path.join(LOCALE_DIR, locale)
        locale_out_dir = path.join(LOCALE_OUT_DIR, locale)

        if os.path.isdir(locale_dir):
            po = POFile(encoding='UTF-8')
            po.metadata = {"Content-Type": "text/plain; charset=utf-8"}
            output_path = path.join(locale_out_dir, "cities.po")
            hits = 0
            misses = 0

            if not path.exists(locale_out_dir):
                os.makedirs(locale_out_dir)

            print "Generating {}/cities.po".format(locale)

            with fiona.open(input_path) as source:
                for feat in source:
                    props = lower_dict_keys(feat["properties"])

                    if props["pop_max"] >= POPULATION_MAX_FILTER:
                        name_key = "_".join(
                            ("name", get_locale_language(locale)))
                        name_alt_key = "_".join(
                            ("name", convert_locale_ident(locale)))
                        name_fallback = "name"

                        if props.get(name_key) is not None:
                            translated_name = props.get(name_key)
                            hits += 1
                        elif props.get(name_alt_key) is not None:
                            translated_name = props.get(name_alt_key)
                            hits += 1
                        elif props.get(name_fallback) is not None:
                            translated_name = props.get(name_fallback)
                            print c.orange(
                                u"  Missing translation for {}".format(
                                    translated_name))
                            misses += 1
                        else:
                            raise ValueError(
                                "Cannot find the translation for {}. Probe keys: {}"
                                .format(locale, (name_key, name_alt_key)))

                        entry = POEntry(msgid=props["name"],
                                        msgstr=translated_name)
                        po.append(entry)

            po.save(output_path)
            print c.green("Extracted {} cities to {}".format(
                len(po), output_path))

            stats.append((locale, hits, misses))

    print_stats_table("Cities translations", stats)
Ejemplo n.º 7
0
    def write_po_files(
        self,
        legal_code,
        language_code,
        english_by_unit_version,
        messages_text,
    ):
        tool = legal_code.tool
        unit = tool.unit
        version = tool.version
        po_filename = legal_code.translation_filename()
        transifex_language = map_django_to_transifex_language_code(
            language_code)

        key = f"{unit}|{version}"
        english_messages = english_by_unit_version[key]

        pofile = POFile()
        # The syntax used to wrap messages in a .po file is
        # difficult if you ever want to copy/paste the messages, so
        # if --unwrapped was passed, set a wrap width that will
        # essentially disable wrapping.
        if self.unwrapped:
            pofile.wrapwidth = 999999

        # Use the English message text as the message key
        for internal_key, translation in messages_text.items():
            message_key = english_messages[internal_key]
            message_value = translation

            pofile.append(
                POEntry(
                    msgid=clean_string(message_key),
                    msgstr=clean_string(message_value),
                ))
        # https://www.gnu.org/software/gettext/manual/html_node/Header-Entry.html  # noqa: E501
        pofile.metadata = {
            "Content-Transfer-Encoding": "8bit",
            "Content-Type": "text/plain; charset=utf-8",
            "Language": transifex_language,
            "Language-Django": language_code,
            "Language-Transifex": transifex_language,
            "Language-Team": "https://www.transifex.com/creativecommons/CC/",
            "MIME-Version": "1.0",
            "PO-Revision-Date": NOW,
            "Percent-Translated": pofile.percent_translated(),
            "Project-Id-Version": legal_code.tool.resource_slug,
        }

        directory = os.path.dirname(po_filename)
        if not os.path.isdir(directory):
            os.makedirs(directory)
        # Save mofile ourself. We could call 'compilemessages' but
        # it wants to compile everything, which is both overkill
        # and can fail if the venv or project source is not
        # writable. We know this dir is writable, so just save this
        # pofile and mofile ourselves.
        LOG.debug(f"Writing {po_filename.replace('.po', '')}.(mo|po)")
        save_pofile_as_pofile_and_mofile(pofile, po_filename)
Ejemplo n.º 8
0
    def test_all_failure(self):
        with tempfile.NamedTemporaryFile(suffix=".pot") as potfile:
            poobj = POFile()
            poobj.append(POEntry(msgstr="pest string"))
            poobj.append(POEntry(msgstr="past string"))
            poobj.save(potfile.name)

            self.assertFalse(testPOT(potfile.name))
Ejemplo n.º 9
0
def extract_cities_pot():
    input_path = get_shape_path("ne_50m_populated_places")
    input_basename = path.basename(input_path)
    output_path = path.join(OUT_DIR, "cities.pot")

    for locale in os.listdir(LOCALE_DIR):
        locale_dir = path.join(LOCALE_DIR, locale)
        locale_out_dir = path.join(LOCALE_OUT_DIR, locale)

        if os.path.isdir(locale_dir):
            pot = POFile(encoding='UTF-8')
            output_path = path.join(locale_out_dir, "cities.po")

            if not path.exists(locale_out_dir):
                os.makedirs(locale_out_dir)

            print "Generating {}/cities.po".format(locale)

            with fiona.open(input_path) as source:
                for feat in source:
                    props = lower_dict_keys(feat["properties"])

                    if props["pop_max"] >= POPULATION_MAX_FILTER:
                        name_key = "_".join(
                            ("name", get_locale_language(locale)))
                        name_alt_key = "_".join(
                            ("name", convert_locale_ident(locale)))
                        name_fallback = "name"

                        if props.get(name_key) is not None:
                            translated_name = props.get(name_key)
                        elif props.get(name_alt_key) is not None:
                            translated_name = props.get(name_alt_key)
                        elif props.get(name_fallback) is not None:
                            translated_name = props.get(name_fallback)
                            print u"Missing translation for {}".format(
                                translated_name)
                        else:
                            raise ValueError(
                                "Cannot find the translation for {}. Probe keys: {}"
                                .format(locale, (name_key, name_alt_key)))

                        entry = POEntry(msgid=props["name"],
                                        msgstr=translated_name,
                                        occurrences=[(input_basename,
                                                      feat["id"])])
                        pot.append(entry)

            pot.save(output_path)
            print "Extracted {} cities to {}".format(len(pot), output_path)
Ejemplo n.º 10
0
def main():
    out_dir = os.path.join(os.path.dirname(__file__), "archetypes", "en",
                           "LC_MESSAGES")
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    out_path = os.path.join(out_dir, "django.po")

    po = POFile()
    po.metadata = {
        "Project-Id-Version": "hsreplaynet",
        "Report-Msgid-Bugs-To": "",
        "POT-Creation-Date": datetime.now().isoformat(),
        "Last-Translator": "HearthSim <*****@*****.**>",
        "Language-Team": "English",
        "MIME-Version": "1.0",
        "Content-Type": "text/plain; charset=utf-8",
        "Content-Transfer-Encoding": "8bit",
    }

    r = requests.get("https://api.hsreplay.net/v1/archetypes/")
    for archetype in r.json():
        name = archetype.get("name", "")
        url = archetype.get("url", "")
        if not name or not url:
            continue

        entry = POEntry(msgid=name,
                        msgstr="",
                        occurrences=[("https://hsreplay.net" + url, "")])
        po.append(entry)

    htd_r = requests.get(
        "http://www.hearthstonetopdecks.com/wp-json/hsreplay/guides")
    for archetype_desc in htd_r.json():
        desc = archetype_desc.get("hsreplay_guide_snippet", "")
        url = archetype_desc.get("url", "")

        if not desc or not url:
            continue

        entry = POEntry(msgid=desc, msgstr="", occurrences=[(url, "")])

        if entry in po:
            # upstream duplicate
            continue

        po.append(entry)

    po.save(out_path)
    print(f"Written {out_path}")
Ejemplo n.º 11
0
    def save_po(self, path, json_root_key, lang):
        """
        Save Gettext PO file into directory structure as "path/lang/json_root_key.po"
        If "path/lang/json_root_key.po" already exists, it will be updated accordingly to JSON dict
        :param path: Root directory where place to files
        :param json_root_key: JSON key from RcgJsonKeys class
        :param lang: Language to translate. Should be in RcgLanguages class
        :return:
        """
        language = next(name for name in RcgLanguages
                        if name.value["iso_code"] == lang)
        save_path = join(path, lang)
        save_file = join(save_path, json_root_key.value + ".po")

        if not exists(save_path):
            makedirs(save_path)
        if exists(save_file):
            # File already exists, let's try to update it
            logging.info("Updating '{}'...".format(save_file))
            po = pofile(save_file)
            pot = self.generate_pot(json_root_key)
            po.merge(pot)
            po.save(save_file)
        else:
            # File does not exists, create it from JSON data
            logging.info("Creating '{}'...".format(save_file))
            po = POFile(check_for_duplicates=True)
            po.metadata = METADATA_ENTRY

            for entry in self.json_content[json_root_key.value]:
                if entry[RcgLanguages.LANG_ENGLISH.value["key"]] != "":
                    po_entry = POEntry(
                        msgctxt=entry[LANG_KEY],
                        msgid=entry[RcgLanguages.LANG_ENGLISH.value["key"]],
                    )
                    if language.value["key"] in entry and entry[
                            language.value["key"]] is not None:
                        po_entry.msgstr = entry[language.value["key"]]
                        po_entry.flags.append("fuzzy")
                    try:
                        po.append(po_entry)
                    except ValueError:
                        logging.debug(
                            "Entry {} already exists, skipping...".format(
                                entry[LANG_KEY]))

            po.save(save_file)

        return
Ejemplo n.º 12
0
def msgcat(*po_files, **kwargs):
    """Concatenate input po_files together, with later files overwriting earlier ones"""
    po_files = list(po_files)

    # use given base_po, or empty PO file
    base_po = kwargs.get('base_po', POFile())
    for po_file in po_files:
        current_app.logger.debug("Combining PO file with %d strings", len(po_file))
        for entry in po_file:
            if base_po.find(entry.msgid):
                base_po.find(entry.msgid).msgstr = entry.msgstr
            else:
                base_po.append(entry)

    current_app.logger.debug("New PO file string count: %d", len(base_po))
    return base_po
Ejemplo n.º 13
0
def extract_countries_po():
  input_path = get_shape_path("ne_50m_admin_0_countries")

  for locale in os.listdir(LOCALE_DIR):
    locale_dir = path.join(LOCALE_DIR, locale)
    locale_out_dir = path.join(LOCALE_OUT_DIR, locale)

    if os.path.isdir(locale_dir):
      with fiona.open(input_path) as source:
        po = POFile(encoding='UTF-8')
        po.metadata = {"Content-Type": "text/plain; charset=utf-8"}
        output_path = path.join(locale_out_dir, "countries.po")

        if not path.exists(locale_out_dir):
          os.makedirs(locale_out_dir)

        print "Generating {}/countries.po".format(locale)

        for feat in source:
          props = lower_dict_keys(feat["properties"])
          name_key = "_".join(("name", get_locale_language(locale)))
          name_alt_key = "_".join(("name", convert_locale_ident(locale)))
          name_fallback = "name"

          if props.get(name_key) is not None:
            translated_name = props.get(name_key)
          elif props.get(name_alt_key) is not None:
            translated_name = props.get(name_alt_key)
          elif props.get(name_fallback) is not None:
            translated_name = props.get(name_fallback)
            print c.orange(u" Missing translation for {}".format(translated_name))
          else:
            raise ValueError(
              "Cannot find the translation for {}. Probe keys: {}"
              .format(locale, (name_key, name_alt_key))
              )

          entry = POEntry(
            msgid=props["name"],
            msgstr=translated_name
          )
          po.append(entry)

        po.save(output_path)
        print c.green("Extracted {} countries for {} to {}".format(len(po), locale, output_path))
Ejemplo n.º 14
0
    def __init__(self, locale, version):
        self.pot = POFile(check_for_duplicate=True)
        self.pot.metadata = {
            'Project-Id-Version': version,
            'POT-Creation-Date': str(datetime.now()),
            'MIME-Version': '1.0',
            'Content-Type': 'text/plain; charset=utf-8',
        }

        (Path('.') / 'locale').mkdir(exist_ok=True)
        self.languages = locale.split('+')
        for lang in self.languages:
            if gettext.find('content',
                            localedir=Path('.') / 'locale',
                            languages=[lang]) is None:
                mo_path = Path(
                    '.') / 'locale' / lang / 'LC_MESSAGES' / 'content.mo'
                po_path = Path(
                    '.') / 'locale' / lang / 'LC_MESSAGES' / 'content.po'
                if po_path.exists():
                    print(
                        f'The file "{str(mo_path)}" not found.\nUsing "{str(po_path)}" instead.'
                    )
                    pofile(po_path).save_as_mofile(mo_path)
                    print(f'"{str(mo_path)}" generated.')
                else:
                    print(
                        f'The file "{str(mo_path)}" or "{str(po_path)}" not found.'
                    )
                    print(f'Using the original Lean files for "{lang}".\n')
            self.translations = [
                gettext.translation('content',
                                    localedir=Path('.') / 'locale',
                                    languages=[lang],
                                    fallback=True) for lang in self.languages
            ]

        self.original_texts = []
        self.translated_texts = [[] for lang in self.languages]

        self.occ = None
Ejemplo n.º 15
0
def extract_relay_locations_pot(countries):
  pot = POFile(encoding='UTF-8')
  pot.metadata = {"Content-Type": "text/plain; charset=utf-8"}
  output_path = path.join(LOCALE_OUT_DIR, "relay-locations.pot")

  print "Generating relay-locations.pot"

  for country in countries:
    cities = country.get("cities")
    if cities is not None:
      for city in cities:
        city_name = city.get("name")
        if city_name is not None:
          entry = POEntry(
            msgid=city_name,
            msgstr=u"",
            comment=u"{} {}".format(country.get("code").upper(), city.get("code").upper())
          )
          pot.append(entry)
          print u"  {} ({})".format(city["name"], city["code"]).encode('utf-8')

  pot.save(output_path)
def main():
    out_dir = os.path.join(os.path.dirname(__file__), "compositions", "en",
                           "LC_MESSAGES")
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    out_path = os.path.join(out_dir, "django.po")

    po = POFile()
    po.metadata = {
        "Project-Id-Version": "hsreplaynet",
        "Report-Msgid-Bugs-To": "",
        "POT-Creation-Date": datetime.now().isoformat(),
        "Last-Translator": "HearthSim <*****@*****.**>",
        "Language-Team": "English",
        "MIME-Version": "1.0",
        "Content-Type": "text/plain; charset=utf-8",
        "Content-Transfer-Encoding": "8bit",
    }

    r = requests.get("https://hsreplay.net/api/v1/compositions/")
    for composition in r.json():
        name = composition.get("name", "")
        if not name:
            continue

        entry = POEntry(
            msgid=name,
            msgstr="",
        )

        if entry in po:
            # duplicate
            continue

        po.append(entry)

    po.save(out_path)
    print(f"Written {out_path}")
Ejemplo n.º 17
0
    def generate_pot(self, json_root_key):
        """
        Generate and return POT file object
        :param json_root_key: JSON key from RcgJsonKeys class
        :return: POT file object
        """
        pot = POFile(check_for_duplicates=True)
        pot.metadata = METADATA_ENTRY
        pot.metadata_is_fuzzy = 1

        for entry in self.json_content[json_root_key.value]:
            if entry[RcgLanguages.LANG_ENGLISH.value["key"]] != "":
                po_entry = POEntry(
                    msgctxt=entry[LANG_KEY],
                    msgid=entry[RcgLanguages.LANG_ENGLISH.value["key"]],
                )
                try:
                    pot.append(po_entry)
                except ValueError:
                    logging.debug(
                        "Entry {} already exists, skipping...".format(
                            entry[LANG_KEY]))

        return pot
Ejemplo n.º 18
0
from littleutils import group_by_key, file_to_json
from polib import POEntry, POFile

from core import linting
from core import translation as t
from core.text import pages, get_predictions, get_special_messages, load_chapters
from core.utils import markdown_codes

this_dir = Path(__file__).parent
frontend_src = this_dir / "../frontend/src"

code_blocks = defaultdict(dict)
code_bits = defaultdict(set)
page_link = ""

po = POFile(wrapwidth=120)
po.metadata = {
    'MIME-Version': '1.0',
    'Content-Type': 'text/plain; charset=utf-8',
    'Content-Transfer-Encoding': '8bit',
}


def entry(msgid, msgstr, comment=""):
    po.append(POEntry(
        msgid=msgid,
        msgstr=msgstr,
        comment=comment,
    ))

Ejemplo n.º 19
0
def translate_single_relay_locations(country_translator, city_translator, countries, locale):
  """
  A helper function to generate the relay-locations.po for the given locale.

  The `countries` argument is an array value that's contained within the "countries" key of the
  relay location list.
  """

  po = POFile(encoding='utf-8', check_for_duplicates=True)
  po.metadata = {"Content-Type": "text/plain; charset=utf-8"}
  locale_out_dir = path.join(LOCALE_OUT_DIR, locale)
  output_path = path.join(locale_out_dir, RELAY_LOCATIONS_PO_FILENAME)

  hits = 0
  misses = 0

  if not path.exists(locale_out_dir):
    os.makedirs(locale_out_dir)

  for country in countries:
    country_name = country.get("name")
    country_code = country.get("code")

    translated_country_name = country_translator.translate(locale, country_code)
    found_country_translation = translated_country_name is not None
    # Default to empty string if no translation was found
    if found_country_translation:
      hits += 1
    else:
      translated_country_name = ""
      misses += 1

    log_message = "{} ({}) -> \"{}\"".format(country_name, country_code, translated_country_name)
    if found_country_translation:
      print(c.green(log_message))
    else:
      print(c.orange(log_message))

    # translate country
    entry = POEntry(
      msgid=country_name,
      msgstr=translated_country_name,
      comment=country_code.upper()
    )
    po.append(entry)

    # translate cities
    cities = country.get("cities")
    if cities is None:
      print(c.orange("Skip {} ({}) because no cities were found."
        .format(country_name, country_code)))
      continue

    for city in cities:
      city_name = city.get("name")
      city_code = city.get("code")
      if city_name is None:
        raise ValueError("Missing the name field in city record.")

      # Make sure to append the US state back to the translated name of the city
      if country_code == "us":
        split = city_name.rsplit(",", 2)
        translated_name = city_translator.translate(locale, split[0].strip())

        if translated_name is not None and len(split) > 1:
          translated_name = "{}, {}".format(translated_name, split[1].strip())
      else:
        translated_name = city_translator.translate(locale, city_name)

      # Default to empty string if no translation was found
      found_translation = translated_name is not None
      if found_translation:
        hits += 1
      else:
        translated_name = ""
        misses += 1

      log_message = "{} ({}) -> \"{}\"".format(city_name, city_code, translated_name)
      if found_translation:
        print(c.green(log_message))
      else:
        print(c.orange(log_message))

      entry = POEntry(
        msgid=city_name,
        msgstr=translated_name,
        comment="{} {}".format(country_code.upper(), city_code.upper())
      )

      try:
        po.append(entry)
      except ValueError as err:
        print(c.orange("Cannot add an entry: {}".format(err)))

  po.save(output_path)

  return (hits, misses)
Ejemplo n.º 20
0
def extract_cities_po():
    input_path = get_shape_path("ne_50m_populated_places")
    stats = []

    for locale in os.listdir(LOCALE_DIR):
        locale_dir = path.join(LOCALE_DIR, locale)
        locale_out_dir = path.join(LOCALE_OUT_DIR, locale)

        if os.path.isdir(locale_dir):
            po = POFile(encoding='utf-8', check_for_duplicates=True)
            po.metadata = {"Content-Type": "text/plain; charset=utf-8"}
            output_path = path.join(locale_out_dir, CITIES_PO_FILENAME)
            hits = 0
            misses = 0

            if not path.exists(locale_out_dir):
                os.makedirs(locale_out_dir)

            print("Generating {}".format(output_path))

            with fiona.open(input_path) as source:
                for feat in source:
                    props = lower_dict_keys(feat["properties"])

                    if props["pop_max"] >= POPULATION_MAX_FILTER:
                        name_key = "name_" + map_locale(locale)
                        name_fallback = "name"

                        if props.get(name_key) is not None:
                            translated_name = props.get(name_key)
                            hits += 1
                        elif props.get(name_fallback) is not None:
                            translated_name = props.get(name_fallback)
                            print(
                                c.orange("Missing translation for {}".format(
                                    translated_name)))
                            misses += 1
                        else:
                            raise ValueError(
                                "Cannot find the translation for {}. Probe keys: {}"
                                .format(locale, (name_key, name_fallback)))

                        entry = POEntry(msgid=props.get("name"),
                                        msgstr=translated_name)

                        try:
                            po.append(entry)
                        except ValueError as err:
                            print(
                                c.orange(
                                    "Cannot add an entry: {}".format(err)))

            sort_pofile_entries(po)
            po.save(output_path)
            print(
                c.green("Extracted {} cities to {}".format(
                    len(po), output_path)))

            stats.append((locale, hits, misses))

    print_stats_table("Cities translations", stats)
Ejemplo n.º 21
0
    def handle(self, input_directory, **options):
        if options["versions"]:
            versions_to_include = options["versions"].split(",")
        else:
            versions_to_include = None
        if options["languages"]:
            languages_to_include = set(["en"]) | set(options["languages"].split(","))
        else:
            languages_to_include = None
        self.unwrapped = options["unwrapped"]

        licenses_created = 0
        legalcodes_created = 0
        legalcodes_to_import = []

        # Get list of html filenames for CC0 and any BY license (any version).
        # We'll filter out the filenames for unwanted versions later.
        html_filenames = sorted(
            [
                f
                for f in os.listdir(input_directory)
                if (f.startswith("by") or f.startswith("zero_1.0"))
                and f.endswith(".html")
            ]
        )
        for filename in html_filenames:
            # print(filename)
            metadata = parse_legalcode_filename(filename)

            basename = os.path.splitext(filename)[0]
            fullpath = os.path.join(input_directory, filename)

            license_code = metadata["license_code"]
            version = metadata["version"]
            jurisdiction_code = metadata["jurisdiction_code"]
            cc_language_code = metadata[
                "cc_language_code"
            ] or get_default_language_for_jurisdiction(jurisdiction_code)
            # Make sure this is a valid language code (one we know about)
            django_language_code = cc_to_django_language_code(cc_language_code)
            if django_language_code not in settings.LANG_INFO:
                raise ValueError(f"Invalid language_code={cc_language_code}")

            # Just CC0, BY 3.0, & 4.0, and apply any command line options
            include = (
                (
                    (license_code in BY_LICENSE_CODES and version in {"3.0", "4.0"})
                    or license_code in CC0_LICENSE_CODES
                )
                and (versions_to_include is None or version in versions_to_include)
                and (
                    languages_to_include is None
                    or cc_language_code in languages_to_include
                )
            )
            if not include:
                continue

            about_url = metadata["about_url"]

            # These are valid for BY only
            license_code_parts = license_code.split("-")
            if "by" in license_code_parts:
                permits_derivative_works = "nd" not in license_code_parts
                permits_reproduction = "nd" not in license_code_parts
                permits_distribution = "nd" not in license_code_parts
                permits_sharing = "nd" not in license_code_parts
                requires_share_alike = "sa" in license_code_parts
                requires_notice = True
                requires_attribution = True
                requires_source_code = False  # GPL, LGPL only, I think
                prohibits_commercial_use = "nc" in license_code_parts
                prohibits_high_income_nation_use = False  # Not any BY 4.0 license
            elif license_code == "CC0":
                # permits anything, requires nothing, prohibits nothing
                permits_derivative_works = True
                permits_reproduction = True
                permits_distribution = True
                permits_sharing = True
                requires_share_alike = False
                requires_notice = False
                requires_attribution = False
                requires_source_code = False
                prohibits_commercial_use = False
                prohibits_high_income_nation_use = False
            else:
                raise NotImplementedError(basename)

            # Find or create a License object
            license, created = License.objects.get_or_create(
                about=about_url,
                defaults=dict(
                    license_code=license_code,
                    version=version,
                    jurisdiction_code=jurisdiction_code,
                    permits_derivative_works=permits_derivative_works,
                    permits_reproduction=permits_reproduction,
                    permits_distribution=permits_distribution,
                    permits_sharing=permits_sharing,
                    requires_share_alike=requires_share_alike,
                    requires_notice=requires_notice,
                    requires_attribution=requires_attribution,
                    requires_source_code=requires_source_code,
                    prohibits_commercial_use=prohibits_commercial_use,
                    prohibits_high_income_nation_use=prohibits_high_income_nation_use,
                ),
            )
            if created:
                licenses_created += 1
            # Find or create a LegalCode object
            legalcode, created = LegalCode.objects.get_or_create(
                license=license,
                language_code=cc_language_code,
                defaults=dict(
                    html_file=fullpath,
                ),
            )

            if created:
                legalcodes_created += 1
            legalcodes_to_import.append(legalcode)
        # print(
        #     f"Created {licenses_created} licenses and {legalcodes_created} translation objects"
        # )

        # NOW parse the HTML and output message files
        legalcodes_to_import = LegalCode.objects.filter(
            pk__in=[lc.pk for lc in legalcodes_to_import]
        )

        # What are the language codes we have HTML files for?
        cc_language_codes = sorted(set(lc.language_code for lc in legalcodes_to_import))

        english_by_license_code_version = {}

        # We have to do English first. Django gets confused if you try to load
        # another language and it can't find English, I guess it's looking for
        # something to fall back to.
        cc_language_codes.remove(
            "en"
        )  # If english isn't in this list, something is wrong
        for cc_language_code in ["en"] + cc_language_codes:
            for legalcode in legalcodes_to_import.filter(
                language_code=cc_language_code,
            ).order_by(
                "-license__version",
                "license__license_code",
                "license__jurisdiction_code",
            ):
                license = legalcode.license
                license_code = license.license_code
                version = license.version
                # print(
                #     f"Importing {legalcode.html_file} {license_code} lang={cc_language_code}"
                # )
                with open(legalcode.html_file, "r", encoding="utf-8") as f:
                    content = f.read()

                if version == "4.0":
                    messages_text = self.import_by_40_license_html(
                        content=content,
                        legalcode=legalcode,
                    )
                elif version == "3.0":
                    if license.jurisdiction_code:
                        # Ported license: we just save the HTML for now
                        legalcode.html = self.import_by_30_ported_license_html(
                            content=content,
                            legalcode=legalcode,
                        )
                        legalcode.save()
                        continue
                    else:
                        # Unported license: we parse out the messages like 4.0
                        messages_text = self.import_by_30_unported_license_html(
                            content=content,
                            legalcode=legalcode,
                        )
                elif license_code == "CC0":
                    messages_text = self.import_cc0_license_html(
                        content=content,
                        legalcode=legalcode,
                    )
                else:
                    raise NotImplementedError(
                        f"Have not implemented parsing for {license_code} {version} licenses."
                    )

                if version != "3.0":
                    # 3.0 doesn't have any translation files - might be the same for other versions
                    key = f"{license_code}|{version}"
                    if cc_language_code == "en":
                        english_by_license_code_version[key] = messages_text
                    english_messages = english_by_license_code_version[key]

                    pofile = POFile()
                    # The syntax used to wrap messages in a .po file is difficult if you ever
                    # want to copy/paste the messages, so if --unwrapped was passed, set a
                    # wrap width that will essentially disable wrapping.
                    if self.unwrapped:
                        pofile.wrapwidth = 999999
                    pofile.metadata = {
                        "Project-Id-Version": f"{license_code}-{version}",
                        # 'Report-Msgid-Bugs-To': '*****@*****.**',
                        # 'POT-Creation-Date': '2007-10-18 14:00+0100',
                        # 'PO-Revision-Date': '2007-10-18 14:00+0100',
                        # 'Last-Translator': 'you <*****@*****.**>',
                        # 'Language-Team': 'English <*****@*****.**>',
                        "Language": cc_language_code,
                        "MIME-Version": "1.0",
                        "Content-Type": "text/plain; charset=utf-8",
                        "Content-Transfer-Encoding": "8bit",
                    }

                    # Use the English message text as the message key
                    for internal_key, translation in messages_text.items():
                        if cc_language_code == "en":
                            message_key = translation.strip()
                            message_value = ""
                        else:
                            # WORKAROUND - by-nc-nd 4.0 NL has an extra item under s3a.
                            # https://github.com/creativecommons/creativecommons.org/pull/1160
                            if (
                                internal_key == "s3a4_if_you_share_adapted_material"
                                and internal_key not in english_messages
                            ):
                                message_key = (
                                    "If You Share Adapted Material You produce, the Adapter's "
                                    "License You apply must not prevent recipients of the Adapted "
                                    "Material from complying with this Public License."
                                )
                            else:
                                message_key = english_messages[internal_key]
                            message_value = translation

                        pofile.append(
                            POEntry(
                                msgid=clean_string(message_key),
                                msgstr=clean_string(message_value),
                            )
                        )

                    po_filename = legalcode.translation_filename()
                    dir = os.path.dirname(po_filename)
                    if not os.path.isdir(dir):
                        os.makedirs(dir)
                    # Save mofile ourself. We could call 'compilemessages' but it wants to
                    # compile everything, which is both overkill and can fail if the venv
                    # or project source is not writable. We know this dir is writable, so
                    # just save this pofile and mofile ourselves.
                    save_pofile_as_pofile_and_mofile(pofile, po_filename)
Ejemplo n.º 22
0
def translate_relay_locations(place_translator, countries, locale):
  po = POFile(encoding='UTF-8')
  po.metadata = {"Content-Type": "text/plain; charset=utf-8"}
  locale_out_dir = path.join(LOCALE_OUT_DIR, locale)
  output_path = path.join(locale_out_dir, "relay-locations.po")

  hits = 0
  misses = 0

  if not path.exists(locale_out_dir):
    os.makedirs(locale_out_dir)

  for country in countries:
    country_name = country.get("name")
    country_code = country.get("code")
    cities = country.get("cities")

    if cities is None:
      print c.orange(u"Skip {} ({}) because no cities were found.".format(
        country_name, country_code))
      continue

    for city in cities:
      city_name = city.get("name")
      city_code = city.get("code")
      if city_name is None:
        raise ValueError("Missing the name field in city record.")

      # Make sure to append the US state back to the translated name of the city
      if country_code == "us":
        split = city_name.rsplit(",", 2)
        translated_name = place_translator.translate(locale, split[0].strip())

        if translated_name is not None and len(split) > 1:
          translated_name = u"{}, {}".format(translated_name, split[1].strip())
      else:
        translated_name = place_translator.translate(locale, city_name)

      # Default to empty string if no translation was found
      found_translation = translated_name is not None
      if found_translation:
        hits += 1
      else:
        translated_name = ""
        misses += 1

      log_message = u"  {} ({}) -> \"{}\"".format(
        city_name, city_code, translated_name).encode('utf-8')
      if found_translation:
        print c.green(log_message)
      else:
        print c.orange(log_message)

      entry = POEntry(
        msgid=city_name,
        msgstr=translated_name,
        comment=u"{} {}".format(country.get("code").upper(), city.get("code").upper())
      )
      po.append(entry)

  po.save(output_path)

  return (hits, misses)
Ejemplo n.º 23
0
def download_all_translations(state):
    """Download translations from all Smartling projects and combine"""
    creds = {'bearer_token': smartling_authenticate()}
    for pot_file_path in POT_FILES:
        dest_po_basename = os.path.basename(pot_file_path).split('.pot')[0]

        # list of per-project PO file objects, keyed by locale code
        po_files_to_merge = defaultdict(list)
        for project_id in current_app.config['SMARTLING_PROJECT_IDS']:
            current_app.logger.debug(
                "Downloading %s.pot translations from project %s",
                dest_po_basename,
                project_id,
            )
            zip_contents = download_zip_file(
                uri=pot_file_path,
                project_id=project_id,
                state=state,
                credentials=creds,
            )
            with io.BytesIO(zip_contents) as zip_fp, ZipFile(zip_fp, "r") as locales_zip:
                for po_locale_code, po in pos_from_zip(locales_zip):
                    po_files_to_merge[po_locale_code].append(po)

        for locale_code, po_files in po_files_to_merge.items():
            current_app.logger.debug(
                "Combining PO files for %s.pot (%s)",
                dest_po_basename,
                locale_code,
            )
            dest_po_path = os.path.join(
                current_app.root_path, "translations",
                locale_code, 'LC_MESSAGES',
            )
            dest_po = os.path.join(dest_po_path, '{}.po'.format(dest_po_basename))
            dest_mo = os.path.join(dest_po_path, '{}.mo'.format(dest_po_basename))

            # disable line-wrapping
            base_po = POFile(wrapwidth=-1)

            # re-use metadata of first PO file
            # todo: set config common for pybabel extract too
            base_po.metadata = po_files[0].metadata
            combined_po = msgcat(*po_files, base_po=base_po)

            # Create directory if necessary
            if not os.path.isdir(dest_po_path):
                try:
                    os.makedirs(dest_po_path)
                except OSError as e:
                    current_app.logger.error(e)
                    sys.exit("Error in creating directory {}".format(os.path.dirname(dest_po_path)))

            combined_po.save(dest_po)
            current_app.logger.info(
                "Saved combined PO file: %s",
                os.path.relpath(dest_po, current_app.root_path),
            )

            combined_po.save_as_mofile(dest_mo)
            current_app.logger.info(
                "Saved combined MO file: %s",
                os.path.relpath(dest_mo, current_app.root_path),
            )
Ejemplo n.º 24
0
def command_main_po(from_language, to_language, input, output=None, po=None, pot=None, pdf=False, undetected_strings=[], fake=None):
    extension=input.split(".")[1:][0]

    if output is None:
        output=f"{input}.{to_language}.{extension}"
    if po is None:
        po=f"{input}.{to_language}.po"
    if pot is None:
        pot=f"{input}.{to_language}.pot"
        
    logfile=f"{output}.log"
    log=open(logfile, "w")


    s=_(f"Translating '{input}' from '{from_language }' to '{to_language}'")
    print(s)
    log.write(s+"\n")
    s=_(f"  - Output: {output}")
    print(s)
    log.write(s+"\n")
    s=_(f"  - File catalog pot: {pot}")
    print(s)
    log.write(s+"\n")
    s=_(f"  - File catalog po: {po}")
    print(s)
    log.write(s+"\n")
    s=_(f"  - Translation log: {logfile}")
    print(s)
    log.write(s+"\n")
    
    original_xlf="original.xlf"
    if path.exists(original_xlf) is True:
        remove(original_xlf)
    
    doc=ODT(input)
    
    
    # Creating a list of ordered document strings
    run_check(["odf2xliff", input, original_xlf])
    ##Leemos sources
    mytree = ET.parse(original_xlf)
    myroot = mytree.getroot()
    file_=myroot[0]
    body=file_[0]
    sources=set()
    for e in body:
        if e[0].text is not None:#<source>
            s=innercontent(e[0])
            arr=removeTags(s)
            for t in arr:
                sources.add(t)
                
    for s in undetected_strings:
        sources.add(s)
    sources=list(sources)

    
    
    
    if path.exists(original_xlf) is True:
        remove(original_xlf)
    
    # Creating pot file
    file_pot = POFile()
    file_pot.metadata = {
        'Project-Id-Version': '1.0',
        'Report-Msgid-Bugs-To': '*****@*****.**',
        'POT-Creation-Date': '2007-10-18 14:00+0100',
        'PO-Revision-Date': '2007-10-18 14:00+0100',
        'Last-Translator': 'you <*****@*****.**>',
        'Language-Team': 'English <*****@*****.**>',
        'MIME-Version': '1.0',
        'Content-Type': 'text/plain; charset=utf-8',
        'Content-Transfer-Encoding': '8bit',
    }
    for i,  source in enumerate(sources):
        entry = POEntry(
            msgid=source,
            msgstr='', 
            occurrences=[('string', str(i)),]
        )
        file_pot.append(entry)
    file_pot.save(pot)
    
    #Merging pot with out po file
    if path.exists(po)==False:
        run_check(["msginit", "-i", pot,  "-o", po])
    run_check(["msgmerge","-N", "--no-wrap","-U", po, pot])
    
    # Creates a dictionary of translations
    dict_po={}
    file_po = pofile(po)
    for i, entry in enumerate(file_po):
        if fake is True:
            dict_po[entry.msgid]=f"{{{entry.msgid}}}"
        else:
            if entry.msgstr == "":
                dict_po[entry.msgid]=entry.msgid
            else:
                dict_po[entry.msgid]=entry.msgstr
        
    #Converts sources to entries (list of tuples)
    entries=[]
    for source in sources:
        entries.append((source, dict_po[source] ))
    entries=sorted(entries,  key=lambda item: len(item[0]), reverse=True)
    
    
    #Creating our translated output
    log.write ("\n\n==== TRANSLATION LOGS ====\n")
    warns=""
    for i, (find, replace) in enumerate(entries):
        number=doc.findall_and_replace(find,  replace)
        rs=replaced_entries_before(find,  i,  entries)
        s=f"""
* Entry {i}

    Original: {find}

    Translation set {number} times: {replace}
"""
        log.write(s)
        if len(rs)>0:
            warns=warns + s +"    WARNING: This replacement could overwrite before replacements. Perhaps you'll need to overwrite your result later with unogenerator.\n"
            for s in rs:
                warns =warns + f"        - '{s[0]}' ==> '{s[1]}'\n"
                
    if warns!="":
        log.write ("\n\n==== WARNINGS ====\n")
        log.write(warns)
            
    doc.save(output)
    if pdf is True:
        doc.export_pdf(output+".pdf")
    doc.close()
    print(f"{len(sources)} messages found. {len(file_po.translated_entries())} translated. {len(file_po.untranslated_entries())} untranslated.")
    
    log.close()
Ejemplo n.º 25
0
def command_generate_po(from_language, to_language, input, output_directory, translate,  undetected_strings=[]):   
    def same_entries_to_ocurrences(l):
        l= sorted(l, key=lambda x: (x[1], x[2], x[3]))
        r=[]
        for filename, type, number,  position,  text in l:
            r.append((filename, f"{type}#{number}#{position}"))
        return r
        
        ##########################
        
    makedirs(output_directory, exist_ok=True)
    makedirs(f"{output_directory}/{to_language}", exist_ok=True)
    
    pot=f"{output_directory}/catalogue.pot"
    po=f"{output_directory}/{to_language}/{to_language}.po"
        
    entries=[]#List of ("type", numero, posicion) type=Paragraph, numero=numero parrafo y posición orden dentro del parrafo
    set_strings=set()
    # Creating pot file
    print(_("Extracting strings from:"))
    for filename in input:
        print(_(f"   - {filename}"))
        doc=ODT(filename)

        #Extract strings from paragraphs
        enumeration = doc.cursor.Text.createEnumeration()
        for i,  par in enumerate(enumeration):
            if  par.supportsService("com.sun.star.text.Paragraph") :
                for position, element in enumerate(par.createEnumeration()):
                    text_=element.getString()
                    if text_ !="" and text_!=" " and text_!="  ":
                        entries.append((filename,"Paragraph",  i,  position, text_))
                        set_strings.add(text_)
        doc.close()
    
    #Extract strings from headers
#    ' Turn headers on and then make them shared!
#oPstyle.HeaderOn = True
#oPstyle.HeaderShared = True
#' The is also a RightText and a LeftText
#oHeader = oPstyle.RightPageHeaderContent
#oText = oHeader.CenterText
#' You may now set the text object to be anything you desire
#' Use setSTring() from the text object to set simple text.
#' Use a cursor to insert a field (such as the current sheet name).
#' First, clear any existing text!
#oText.setString("")
#oCursor = oText.createTextCursor()
#oText.insertString(oCursor, "Sheet: ", False)
#' This will have the sheet name of the current sheet!
#sService = "com.sun.star.text.TextField.SheetName"
#oField = oDoc.createInstance(sService)
#oText.insertTextContent(oCursor, oField, False)
    
        
    file_pot = POFile()
    file_pot.metadata = {
        'Project-Id-Version': '1.0',
        'Report-Msgid-Bugs-To': '*****@*****.**',
        'POT-Creation-Date': '2007-10-18 14:00+0100',
        'PO-Revision-Date': '2007-10-18 14:00+0100',
        'Last-Translator': 'you <*****@*****.**>',
        'Language-Team': 'English <*****@*****.**>',
        'MIME-Version': '1.0',
        'Content-Type': 'text/plain; charset=utf-8',
        'Content-Transfer-Encoding': '8bit',
    }
    for s in set_strings:
        same_entries=[] #Join seame text entries
        for filename, type, number, position, string_ in entries:
            if string_==s:
                same_entries.append((filename, type, number, position, string_))

        entry = POEntry(
            msgid=s,
            msgstr='', 
            occurrences=same_entries_to_ocurrences(same_entries)
        )
        file_pot.append(entry)
    file_pot.save(pot)
    
    #Merging pot with out po file
    if path.exists(po)==False:
        run_check(["msginit", "-i", pot,  "-o", po])
    run_check(["msgmerge","-N", "--no-wrap","-U", po, pot])
    
    print(f"{len(set_strings)} different strings detected")
    
    
    if translate is True:
        print(_("Translating files to:"))
        for filename in input:
            doc=ODT(filename)
            output=f"{output_directory}/{to_language}/{path.basename(filename)}"
            print(_(f"   - {output}"))
            doc.save(output)
            doc.close()