def write_po_files(
        self,
        legal_code,
        language_code,
        english_by_unit_version,
        messages_text,
    ):
        tool = legal_code.tool
        unit = tool.unit
        version = tool.version
        po_filename = legal_code.translation_filename()
        transifex_language = map_django_to_transifex_language_code(
            language_code)

        key = f"{unit}|{version}"
        english_messages = english_by_unit_version[key]

        pofile = POFile()
        # The syntax used to wrap messages in a .po file is
        # difficult if you ever want to copy/paste the messages, so
        # if --unwrapped was passed, set a wrap width that will
        # essentially disable wrapping.
        if self.unwrapped:
            pofile.wrapwidth = 999999

        # Use the English message text as the message key
        for internal_key, translation in messages_text.items():
            message_key = english_messages[internal_key]
            message_value = translation

            pofile.append(
                POEntry(
                    msgid=clean_string(message_key),
                    msgstr=clean_string(message_value),
                ))
        # https://www.gnu.org/software/gettext/manual/html_node/Header-Entry.html  # noqa: E501
        pofile.metadata = {
            "Content-Transfer-Encoding": "8bit",
            "Content-Type": "text/plain; charset=utf-8",
            "Language": transifex_language,
            "Language-Django": language_code,
            "Language-Transifex": transifex_language,
            "Language-Team": "https://www.transifex.com/creativecommons/CC/",
            "MIME-Version": "1.0",
            "PO-Revision-Date": NOW,
            "Percent-Translated": pofile.percent_translated(),
            "Project-Id-Version": legal_code.tool.resource_slug,
        }

        directory = os.path.dirname(po_filename)
        if not os.path.isdir(directory):
            os.makedirs(directory)
        # Save mofile ourself. We could call 'compilemessages' but
        # it wants to compile everything, which is both overkill
        # and can fail if the venv or project source is not
        # writable. We know this dir is writable, so just save this
        # pofile and mofile ourselves.
        LOG.debug(f"Writing {po_filename.replace('.po', '')}.(mo|po)")
        save_pofile_as_pofile_and_mofile(pofile, po_filename)
예제 #2
0
    def handle(self, input_directory, **options):
        if options["versions"]:
            versions_to_include = options["versions"].split(",")
        else:
            versions_to_include = None
        if options["languages"]:
            languages_to_include = set(["en"]) | set(options["languages"].split(","))
        else:
            languages_to_include = None
        self.unwrapped = options["unwrapped"]

        licenses_created = 0
        legalcodes_created = 0
        legalcodes_to_import = []

        # Get list of html filenames for CC0 and any BY license (any version).
        # We'll filter out the filenames for unwanted versions later.
        html_filenames = sorted(
            [
                f
                for f in os.listdir(input_directory)
                if (f.startswith("by") or f.startswith("zero_1.0"))
                and f.endswith(".html")
            ]
        )
        for filename in html_filenames:
            # print(filename)
            metadata = parse_legalcode_filename(filename)

            basename = os.path.splitext(filename)[0]
            fullpath = os.path.join(input_directory, filename)

            license_code = metadata["license_code"]
            version = metadata["version"]
            jurisdiction_code = metadata["jurisdiction_code"]
            cc_language_code = metadata[
                "cc_language_code"
            ] or get_default_language_for_jurisdiction(jurisdiction_code)
            # Make sure this is a valid language code (one we know about)
            django_language_code = cc_to_django_language_code(cc_language_code)
            if django_language_code not in settings.LANG_INFO:
                raise ValueError(f"Invalid language_code={cc_language_code}")

            # Just CC0, BY 3.0, & 4.0, and apply any command line options
            include = (
                (
                    (license_code in BY_LICENSE_CODES and version in {"3.0", "4.0"})
                    or license_code in CC0_LICENSE_CODES
                )
                and (versions_to_include is None or version in versions_to_include)
                and (
                    languages_to_include is None
                    or cc_language_code in languages_to_include
                )
            )
            if not include:
                continue

            about_url = metadata["about_url"]

            # These are valid for BY only
            license_code_parts = license_code.split("-")
            if "by" in license_code_parts:
                permits_derivative_works = "nd" not in license_code_parts
                permits_reproduction = "nd" not in license_code_parts
                permits_distribution = "nd" not in license_code_parts
                permits_sharing = "nd" not in license_code_parts
                requires_share_alike = "sa" in license_code_parts
                requires_notice = True
                requires_attribution = True
                requires_source_code = False  # GPL, LGPL only, I think
                prohibits_commercial_use = "nc" in license_code_parts
                prohibits_high_income_nation_use = False  # Not any BY 4.0 license
            elif license_code == "CC0":
                # permits anything, requires nothing, prohibits nothing
                permits_derivative_works = True
                permits_reproduction = True
                permits_distribution = True
                permits_sharing = True
                requires_share_alike = False
                requires_notice = False
                requires_attribution = False
                requires_source_code = False
                prohibits_commercial_use = False
                prohibits_high_income_nation_use = False
            else:
                raise NotImplementedError(basename)

            # Find or create a License object
            license, created = License.objects.get_or_create(
                about=about_url,
                defaults=dict(
                    license_code=license_code,
                    version=version,
                    jurisdiction_code=jurisdiction_code,
                    permits_derivative_works=permits_derivative_works,
                    permits_reproduction=permits_reproduction,
                    permits_distribution=permits_distribution,
                    permits_sharing=permits_sharing,
                    requires_share_alike=requires_share_alike,
                    requires_notice=requires_notice,
                    requires_attribution=requires_attribution,
                    requires_source_code=requires_source_code,
                    prohibits_commercial_use=prohibits_commercial_use,
                    prohibits_high_income_nation_use=prohibits_high_income_nation_use,
                ),
            )
            if created:
                licenses_created += 1
            # Find or create a LegalCode object
            legalcode, created = LegalCode.objects.get_or_create(
                license=license,
                language_code=cc_language_code,
                defaults=dict(
                    html_file=fullpath,
                ),
            )

            if created:
                legalcodes_created += 1
            legalcodes_to_import.append(legalcode)
        # print(
        #     f"Created {licenses_created} licenses and {legalcodes_created} translation objects"
        # )

        # NOW parse the HTML and output message files
        legalcodes_to_import = LegalCode.objects.filter(
            pk__in=[lc.pk for lc in legalcodes_to_import]
        )

        # What are the language codes we have HTML files for?
        cc_language_codes = sorted(set(lc.language_code for lc in legalcodes_to_import))

        english_by_license_code_version = {}

        # We have to do English first. Django gets confused if you try to load
        # another language and it can't find English, I guess it's looking for
        # something to fall back to.
        cc_language_codes.remove(
            "en"
        )  # If english isn't in this list, something is wrong
        for cc_language_code in ["en"] + cc_language_codes:
            for legalcode in legalcodes_to_import.filter(
                language_code=cc_language_code,
            ).order_by(
                "-license__version",
                "license__license_code",
                "license__jurisdiction_code",
            ):
                license = legalcode.license
                license_code = license.license_code
                version = license.version
                # print(
                #     f"Importing {legalcode.html_file} {license_code} lang={cc_language_code}"
                # )
                with open(legalcode.html_file, "r", encoding="utf-8") as f:
                    content = f.read()

                if version == "4.0":
                    messages_text = self.import_by_40_license_html(
                        content=content,
                        legalcode=legalcode,
                    )
                elif version == "3.0":
                    if license.jurisdiction_code:
                        # Ported license: we just save the HTML for now
                        legalcode.html = self.import_by_30_ported_license_html(
                            content=content,
                            legalcode=legalcode,
                        )
                        legalcode.save()
                        continue
                    else:
                        # Unported license: we parse out the messages like 4.0
                        messages_text = self.import_by_30_unported_license_html(
                            content=content,
                            legalcode=legalcode,
                        )
                elif license_code == "CC0":
                    messages_text = self.import_cc0_license_html(
                        content=content,
                        legalcode=legalcode,
                    )
                else:
                    raise NotImplementedError(
                        f"Have not implemented parsing for {license_code} {version} licenses."
                    )

                if version != "3.0":
                    # 3.0 doesn't have any translation files - might be the same for other versions
                    key = f"{license_code}|{version}"
                    if cc_language_code == "en":
                        english_by_license_code_version[key] = messages_text
                    english_messages = english_by_license_code_version[key]

                    pofile = POFile()
                    # The syntax used to wrap messages in a .po file is difficult if you ever
                    # want to copy/paste the messages, so if --unwrapped was passed, set a
                    # wrap width that will essentially disable wrapping.
                    if self.unwrapped:
                        pofile.wrapwidth = 999999
                    pofile.metadata = {
                        "Project-Id-Version": f"{license_code}-{version}",
                        # 'Report-Msgid-Bugs-To': '*****@*****.**',
                        # 'POT-Creation-Date': '2007-10-18 14:00+0100',
                        # 'PO-Revision-Date': '2007-10-18 14:00+0100',
                        # 'Last-Translator': 'you <*****@*****.**>',
                        # 'Language-Team': 'English <*****@*****.**>',
                        "Language": cc_language_code,
                        "MIME-Version": "1.0",
                        "Content-Type": "text/plain; charset=utf-8",
                        "Content-Transfer-Encoding": "8bit",
                    }

                    # Use the English message text as the message key
                    for internal_key, translation in messages_text.items():
                        if cc_language_code == "en":
                            message_key = translation.strip()
                            message_value = ""
                        else:
                            # WORKAROUND - by-nc-nd 4.0 NL has an extra item under s3a.
                            # https://github.com/creativecommons/creativecommons.org/pull/1160
                            if (
                                internal_key == "s3a4_if_you_share_adapted_material"
                                and internal_key not in english_messages
                            ):
                                message_key = (
                                    "If You Share Adapted Material You produce, the Adapter's "
                                    "License You apply must not prevent recipients of the Adapted "
                                    "Material from complying with this Public License."
                                )
                            else:
                                message_key = english_messages[internal_key]
                            message_value = translation

                        pofile.append(
                            POEntry(
                                msgid=clean_string(message_key),
                                msgstr=clean_string(message_value),
                            )
                        )

                    po_filename = legalcode.translation_filename()
                    dir = os.path.dirname(po_filename)
                    if not os.path.isdir(dir):
                        os.makedirs(dir)
                    # Save mofile ourself. We could call 'compilemessages' but it wants to
                    # compile everything, which is both overkill and can fail if the venv
                    # or project source is not writable. We know this dir is writable, so
                    # just save this pofile and mofile ourselves.
                    save_pofile_as_pofile_and_mofile(pofile, po_filename)