def write_po_files( self, legal_code, language_code, english_by_unit_version, messages_text, ): tool = legal_code.tool unit = tool.unit version = tool.version po_filename = legal_code.translation_filename() transifex_language = map_django_to_transifex_language_code( language_code) key = f"{unit}|{version}" english_messages = english_by_unit_version[key] pofile = POFile() # The syntax used to wrap messages in a .po file is # difficult if you ever want to copy/paste the messages, so # if --unwrapped was passed, set a wrap width that will # essentially disable wrapping. if self.unwrapped: pofile.wrapwidth = 999999 # Use the English message text as the message key for internal_key, translation in messages_text.items(): message_key = english_messages[internal_key] message_value = translation pofile.append( POEntry( msgid=clean_string(message_key), msgstr=clean_string(message_value), )) # https://www.gnu.org/software/gettext/manual/html_node/Header-Entry.html # noqa: E501 pofile.metadata = { "Content-Transfer-Encoding": "8bit", "Content-Type": "text/plain; charset=utf-8", "Language": transifex_language, "Language-Django": language_code, "Language-Transifex": transifex_language, "Language-Team": "https://www.transifex.com/creativecommons/CC/", "MIME-Version": "1.0", "PO-Revision-Date": NOW, "Percent-Translated": pofile.percent_translated(), "Project-Id-Version": legal_code.tool.resource_slug, } directory = os.path.dirname(po_filename) if not os.path.isdir(directory): os.makedirs(directory) # Save mofile ourself. We could call 'compilemessages' but # it wants to compile everything, which is both overkill # and can fail if the venv or project source is not # writable. We know this dir is writable, so just save this # pofile and mofile ourselves. LOG.debug(f"Writing {po_filename.replace('.po', '')}.(mo|po)") save_pofile_as_pofile_and_mofile(pofile, po_filename)
def handle(self, input_directory, **options): if options["versions"]: versions_to_include = options["versions"].split(",") else: versions_to_include = None if options["languages"]: languages_to_include = set(["en"]) | set(options["languages"].split(",")) else: languages_to_include = None self.unwrapped = options["unwrapped"] licenses_created = 0 legalcodes_created = 0 legalcodes_to_import = [] # Get list of html filenames for CC0 and any BY license (any version). # We'll filter out the filenames for unwanted versions later. html_filenames = sorted( [ f for f in os.listdir(input_directory) if (f.startswith("by") or f.startswith("zero_1.0")) and f.endswith(".html") ] ) for filename in html_filenames: # print(filename) metadata = parse_legalcode_filename(filename) basename = os.path.splitext(filename)[0] fullpath = os.path.join(input_directory, filename) license_code = metadata["license_code"] version = metadata["version"] jurisdiction_code = metadata["jurisdiction_code"] cc_language_code = metadata[ "cc_language_code" ] or get_default_language_for_jurisdiction(jurisdiction_code) # Make sure this is a valid language code (one we know about) django_language_code = cc_to_django_language_code(cc_language_code) if django_language_code not in settings.LANG_INFO: raise ValueError(f"Invalid language_code={cc_language_code}") # Just CC0, BY 3.0, & 4.0, and apply any command line options include = ( ( (license_code in BY_LICENSE_CODES and version in {"3.0", "4.0"}) or license_code in CC0_LICENSE_CODES ) and (versions_to_include is None or version in versions_to_include) and ( languages_to_include is None or cc_language_code in languages_to_include ) ) if not include: continue about_url = metadata["about_url"] # These are valid for BY only license_code_parts = license_code.split("-") if "by" in license_code_parts: permits_derivative_works = "nd" not in license_code_parts permits_reproduction = "nd" not in license_code_parts permits_distribution = "nd" not in license_code_parts permits_sharing = "nd" not in license_code_parts requires_share_alike = "sa" in license_code_parts requires_notice = True requires_attribution = True requires_source_code = False # GPL, LGPL only, I think prohibits_commercial_use = "nc" in license_code_parts prohibits_high_income_nation_use = False # Not any BY 4.0 license elif license_code == "CC0": # permits anything, requires nothing, prohibits nothing permits_derivative_works = True permits_reproduction = True permits_distribution = True permits_sharing = True requires_share_alike = False requires_notice = False requires_attribution = False requires_source_code = False prohibits_commercial_use = False prohibits_high_income_nation_use = False else: raise NotImplementedError(basename) # Find or create a License object license, created = License.objects.get_or_create( about=about_url, defaults=dict( license_code=license_code, version=version, jurisdiction_code=jurisdiction_code, permits_derivative_works=permits_derivative_works, permits_reproduction=permits_reproduction, permits_distribution=permits_distribution, permits_sharing=permits_sharing, requires_share_alike=requires_share_alike, requires_notice=requires_notice, requires_attribution=requires_attribution, requires_source_code=requires_source_code, prohibits_commercial_use=prohibits_commercial_use, prohibits_high_income_nation_use=prohibits_high_income_nation_use, ), ) if created: licenses_created += 1 # Find or create a LegalCode object legalcode, created = LegalCode.objects.get_or_create( license=license, language_code=cc_language_code, defaults=dict( html_file=fullpath, ), ) if created: legalcodes_created += 1 legalcodes_to_import.append(legalcode) # print( # f"Created {licenses_created} licenses and {legalcodes_created} translation objects" # ) # NOW parse the HTML and output message files legalcodes_to_import = LegalCode.objects.filter( pk__in=[lc.pk for lc in legalcodes_to_import] ) # What are the language codes we have HTML files for? cc_language_codes = sorted(set(lc.language_code for lc in legalcodes_to_import)) english_by_license_code_version = {} # We have to do English first. Django gets confused if you try to load # another language and it can't find English, I guess it's looking for # something to fall back to. cc_language_codes.remove( "en" ) # If english isn't in this list, something is wrong for cc_language_code in ["en"] + cc_language_codes: for legalcode in legalcodes_to_import.filter( language_code=cc_language_code, ).order_by( "-license__version", "license__license_code", "license__jurisdiction_code", ): license = legalcode.license license_code = license.license_code version = license.version # print( # f"Importing {legalcode.html_file} {license_code} lang={cc_language_code}" # ) with open(legalcode.html_file, "r", encoding="utf-8") as f: content = f.read() if version == "4.0": messages_text = self.import_by_40_license_html( content=content, legalcode=legalcode, ) elif version == "3.0": if license.jurisdiction_code: # Ported license: we just save the HTML for now legalcode.html = self.import_by_30_ported_license_html( content=content, legalcode=legalcode, ) legalcode.save() continue else: # Unported license: we parse out the messages like 4.0 messages_text = self.import_by_30_unported_license_html( content=content, legalcode=legalcode, ) elif license_code == "CC0": messages_text = self.import_cc0_license_html( content=content, legalcode=legalcode, ) else: raise NotImplementedError( f"Have not implemented parsing for {license_code} {version} licenses." ) if version != "3.0": # 3.0 doesn't have any translation files - might be the same for other versions key = f"{license_code}|{version}" if cc_language_code == "en": english_by_license_code_version[key] = messages_text english_messages = english_by_license_code_version[key] pofile = POFile() # The syntax used to wrap messages in a .po file is difficult if you ever # want to copy/paste the messages, so if --unwrapped was passed, set a # wrap width that will essentially disable wrapping. if self.unwrapped: pofile.wrapwidth = 999999 pofile.metadata = { "Project-Id-Version": f"{license_code}-{version}", # 'Report-Msgid-Bugs-To': '*****@*****.**', # 'POT-Creation-Date': '2007-10-18 14:00+0100', # 'PO-Revision-Date': '2007-10-18 14:00+0100', # 'Last-Translator': 'you <*****@*****.**>', # 'Language-Team': 'English <*****@*****.**>', "Language": cc_language_code, "MIME-Version": "1.0", "Content-Type": "text/plain; charset=utf-8", "Content-Transfer-Encoding": "8bit", } # Use the English message text as the message key for internal_key, translation in messages_text.items(): if cc_language_code == "en": message_key = translation.strip() message_value = "" else: # WORKAROUND - by-nc-nd 4.0 NL has an extra item under s3a. # https://github.com/creativecommons/creativecommons.org/pull/1160 if ( internal_key == "s3a4_if_you_share_adapted_material" and internal_key not in english_messages ): message_key = ( "If You Share Adapted Material You produce, the Adapter's " "License You apply must not prevent recipients of the Adapted " "Material from complying with this Public License." ) else: message_key = english_messages[internal_key] message_value = translation pofile.append( POEntry( msgid=clean_string(message_key), msgstr=clean_string(message_value), ) ) po_filename = legalcode.translation_filename() dir = os.path.dirname(po_filename) if not os.path.isdir(dir): os.makedirs(dir) # Save mofile ourself. We could call 'compilemessages' but it wants to # compile everything, which is both overkill and can fail if the venv # or project source is not writable. We know this dir is writable, so # just save this pofile and mofile ourselves. save_pofile_as_pofile_and_mofile(pofile, po_filename)