Exemple #1
0
    def plurr_placeholders(self, str1, str2, **kwargs):
        """For plurr-formatted strings, checks placeholders used in target
        strings actually exist in the source string.
        """
        if str2 == u"" or not plurr_placeholders_regex.search(str1):
            return True

        placeholders_source = [
            clean_plurr_placeholder(source)
            for source in plurr_placeholders_regex.findall(str1)
        ]
        placeholders_target = [
            clean_plurr_placeholder(target)
            for target in plurr_placeholders_regex.findall(str2)
        ]
        if set(placeholders_source) == set(placeholders_target):
            return True

        unknown_in_target = set(placeholders_target) - set(placeholders_source)
        if len(unknown_in_target) > 0:
            raise checks.FilterFailure(
                u"Unknown placeholders in translation: %s" %
                u", ".join(unknown_in_target))

        missing_in_translation = set(placeholders_source) - set(
            placeholders_target)
        if len(missing_in_translation) > 0:
            raise checks.FilterFailure(
                u"Placeholders missing in translation: %s" %
                u", ".join(missing_in_translation))

        return True
Exemple #2
0
    def unbalanced_tag_braces(self, str1, str2):
        def get_fingerprint(str, is_source=False, translation=''):
            chunks = unbalanced_tag_braces_regex.split(str)
            translate = False
            level = 0

            for chunk in chunks:
                translate = not translate
                if translate:
                    # ordinary text (safe to translate)
                    continue

                # special text
                if level >= 0:
                    if chunk == '<':
                        level += 1

                    if chunk == '>':
                        level -= 1

            return level

        if check_translation(get_fingerprint, str1, str2):
            return True
        else:
            raise checks.FilterFailure(u"Unbalanced tag braces")
Exemple #3
0
    def whitespace(self, str1, str2):
        def get_fingerprint(str, is_source=False, translation=''):
            chunks = whitespace_regex.split(str)
            translate = False
            fp_data = [u"\001"]

            for chunk in chunks:
                translate = not translate

                # add empty chunk to fingerprint data to detect begin or
                # end whitespaces
                if chunk == u'':
                    fp_data.append(chunk)

                if translate:
                    # ordinary text (safe to translate)
                    continue

                # special text
                fp_data.append(chunk)

            fingerprint = u"\001".join(fp_data)

            return fingerprint

        if check_translation(get_fingerprint, str1, str2):
            return True
        else:
            raise checks.FilterFailure(u"Incorrect whitespaces")
Exemple #4
0
def _generic_check(str1, str2, regex, message):
    def get_fingerprint(str, is_source=False, translation=''):
        chunks = regex.split(str)

        translate = False
        d = {}
        fingerprint = ''

        if is_source and len(chunks) == 1:
            raise SkipCheck()

        for chunk in chunks:
            translate = not translate

            if translate:
                # ordinary text (safe to translate)
                continue

            # special text
            if chunk in d:
                d[chunk] += 1
            else:
                d[chunk] = 1

        for key in sorted(d.keys()):
            fingerprint += u"\001%s\001%s" % (key, d[key])

        return fingerprint

    if check_translation(get_fingerprint, str1, str2):
        return True
    else:
        raise checks.FilterFailure(message)
Exemple #5
0
    def unbalanced_curly_braces(self, str1, str2):
        def get_fingerprint(str, is_source=False, translation=''):
            chunks = unbalanced_curly_braces_regex.split(str)
            translate = False
            count = 0
            level = 0

            for chunk in chunks:
                translate = not translate
                if translate:
                    # ordinary text (safe to translate)
                    continue

                # special text
                count += 1
                if level >= 0:
                    if chunk == '{':
                        level += 1
                    if chunk == '}':
                        level -= 1

            fingerprint = u"%d\001%d" % (count, level)

            # if source string has unbalanced tags, always report it
            if is_source and not level == 0:
                # just make the fingerprint different by one symbol
                fingerprint += u"\001"

            return fingerprint

        if check_translation(get_fingerprint, str1, str2):
            return True
        else:
            raise checks.FilterFailure(u"Unbalanced curly braces")
Exemple #6
0
    def changed_attributes(self, str1, str2):
        def get_fingerprint(str, is_source=False, translation=''):
            # hardcoded rule: skip web banner images which are translated
            # differently
            if is_source:
                if img_banner_regex.match(str):
                    raise SkipCheck()

            chunks = changed_attributes_regex.split(str)
            translate = False
            fingerprint = ''
            d = {}
            for chunk in chunks:
                translate = not translate

                if translate:
                    # ordinary text (safe to translate)
                    continue

                # special text
                if chunk in d:
                    d[chunk] += 1
                else:
                    d[chunk] = 1

            for key in sorted(d.keys()):
                fingerprint += u"\001%s\001%s" % (key, d[key])

            return fingerprint

        if check_translation(get_fingerprint, str1, str2):
            return True
        else:
            raise checks.FilterFailure(u"Changed attributes")
Exemple #7
0
    def doublequoting(self, str1, str2):
        """Checks whether there is no double quotation mark `"` in source string but
        there is in a translation string.
        """
        def get_fingerprint(str, is_source=False, translation=''):
            chunks = str.split('"')
            if is_source and '"' in str:
                raise SkipCheck()

            translate = False
            double_quote_count = 0

            for chunk in chunks:
                translate = not translate
                if translate:
                    # ordinary text (safe to translate)
                    continue

                double_quote_count += 1

            fingerprint = u"%d\001" % double_quote_count

            return fingerprint

        if check_translation(get_fingerprint, str1, str2):
            return True
        else:
            raise checks.FilterFailure(u"Double quotes mismatch")
Exemple #8
0
def _generic_check(str1, str2, regex, message):
    def get_fingerprint(string, is_source=False, translation=""):
        chunks = regex.split(string)

        d = {}
        fingerprint = ""

        if is_source and len(chunks) == 1:
            raise SkipCheck()

        for index, chunk in enumerate(chunks):
            # Chunks contain ordinary text in even positions, so they are safe
            # to be skipped.
            if index % 2 == 0:
                continue

            # special text
            if chunk in d:
                d[chunk] += 1
            else:
                d[chunk] = 1

        for key in sorted(d.keys()):
            fingerprint += u"\001%s\001%s" % (key, d[key])

        return fingerprint

    if check_translation(get_fingerprint, str1, str2):
        return True

    raise checks.FilterFailure(message)
Exemple #9
0
    def linebreaks_double(self, str1, str2, **kwargs):
        source_parts_count = len(linebreaks_double_regex.split(str1))
        target_parts_count = len(linebreaks_double_regex.split(str2))
        if source_parts_count != target_parts_count:
            raise checks.FilterFailure("Double line breaks mismatch")

        return True
Exemple #10
0
    def plurr_format(self, str1, str2, **kwargs):
        """For plurr-formatted strings, checks the syntax is correct."""
        # Ignore check for empty target strings or non Plurr-formatted
        # source strings
        if str2 == u"" or not plurr_format_regex.search(str1):
            return True

        # Ignore check if library is missing
        try:
            from plurr import Plurr
        except ImportError:
            return True

        plurr = Plurr()

        try:
            plurr.format(
                str2,
                {},
                {
                    "locale": kwargs["language_code"],
                    "strict": False,
                    "callback": lambda x: "",
                },
            )
        except SyntaxError as e:
            raise checks.FilterFailure(str(e))

        return True
Exemple #11
0
    def plurr_format(self, str1, str2, **kwargs):
        """For plurr-formatted strings, checks the syntax is correct."""
        # Ignore check for empty target strings or non Plurr-formatted
        # source strings
        if str2 == u'' or not plurr_format_regex.search(str1):
            return True

        # Ignore check if library is missing
        try:
            from plurr import Plurr
        except ImportError:
            return True

        plurr = Plurr()

        try:
            plurr.format(str2, {}, {
                'locale': kwargs['language_code'],
                'strict': False,
                'callback': lambda x: '',
            })
        except SyntaxError as e:
            raise checks.FilterFailure(e.message)

        return True
Exemple #12
0
    def doublequoting(self, str1, str2):
        """Checks whether double quotation mark `"` is consistent between the
        two strings.
        """
        def get_fingerprint(str, is_source=False, translation=''):
            chunks = str.split('"')
            translate = False
            double_quote_count = 0

            for chunk in chunks:
                translate = not translate
                if translate:
                    # ordinary text (safe to translate)
                    continue

                double_quote_count += 1

            fingerprint = u"%d\001" % double_quote_count

            return fingerprint

        if check_translation(get_fingerprint, str1, str2):
            return True
        else:
            raise checks.FilterFailure(u"Double quotes mismatch")
Exemple #13
0
    def date_format(self, str1, str2):
        def get_fingerprint(str, is_source=False, translation=''):
            if is_source:
                if not date_format_regex_0.match(str):
                    raise SkipCheck()

                # filter out specific English strings which are not dates
                if date_format_regex_1.match(str):
                    raise SkipCheck()

                # filter out specific translation pairs
                if date_format_regex_2.match(str):
                    if date_format_regex_3.match(translation):
                        raise SkipCheck()

                if date_format_regex_4.match(str):
                    if date_format_regex_5.match(translation):
                        raise SkipCheck()

                if date_format_regex_6.match(str):
                    if date_format_regex_7.match(translation):
                        raise SkipCheck()

            fingerprint = u"\001".join(sorted(date_format_regex_8.split(str)))

            return fingerprint

        if check_translation(get_fingerprint, str1, str2):
            return True
        else:
            raise checks.FilterFailure(u"Incorrect date format")
Exemple #14
0
    def test_check(self, str1, str2):
        def get_fingerprint(str, is_source=False, translation=''):
            return 0

        if check_translation(get_fingerprint, str1, str2):
            return True
        else:
            raise checks.FilterFailure(u"Incorrect test check")
Exemple #15
0
    def unescaped_ampersands(self, str1, str2):
        if escaped_entities_regex.search(str1):
            chunks = broken_ampersand_regex.split(str2)
            if len(chunks) == 1:
                return True

            raise checks.FilterFailure(u"Unescaped ampersand mismatch")

        return True
Exemple #16
0
    def accelerators(self, str1, str2, **kwargs):
        def get_fingerprint(string, is_source=False, translation=""):

            # special rule for banner images in the web client which are
            # translated differently, e.g.:
            # From: <img src="/images/account/bnr_allow.gif"
            #            alt="Allow Account Access" />
            # To:   <h1>Allow Konto Zugriff</h1>
            if is_source:
                if img_banner_regex.match(string):
                    raise SkipCheck()

            # temporarily escape HTML entities
            s = accelerators_regex_0.sub(r"\001\1\001", string)
            chunks = accelerators_regex_1.split(s)
            translate = False
            ampersand_count = 0
            underscore_count = 0
            circumflex_count = 0

            regex = re.compile(r"\001(\w+)\001")
            for chunk in chunks:
                translate = not translate
                if translate:
                    # ordinary text (safe to translate)
                    continue

                # special text
                if chunk == "&":
                    ampersand_count += 1
                if chunk == "_":
                    underscore_count += 1
                if chunk == "^":
                    circumflex_count += 1

                # restore HTML entities (will return chunks later)
                chunk = regex.sub(r"&\1;", chunk)

            fingerprint = u"%d\001%d\001%d" % (
                ampersand_count,
                underscore_count,
                circumflex_count,
            )

            return fingerprint

        # Ignore check for Plurr-formatted strings
        if plurr_format_regex.search(str1):
            return True

        if check_translation(get_fingerprint, str1, str2):
            return True

        raise checks.FilterFailure(u"Accelerator mismatch")
Exemple #17
0
    def incorrectly_escaped_ampersands(self, str1, str2, **kwargs):
        if escaped_entities_regex.search(str2):
            chunks = broken_ampersand_regex.split(str1)
            if len(chunks) == 1:
                chunks = broken_ampersand_regex.split(str2)
                if len(chunks) == 1:
                    return True

            raise checks.FilterFailure(u"Escaped ampersand mismatch")

        return True
Exemple #18
0
    def linebreaks_multiple(self, str1, str2, **kwargs):
        source_counts = [
            match.group().count("\n")
            for match in linebreaks_multiple_regex.finditer(str1)
        ]
        target_counts = [
            match.group().count("\n")
            for match in linebreaks_multiple_regex.finditer(str2)
        ]
        if source_counts != target_counts:
            raise checks.FilterFailure("Multiple line breaks mismatch")

        return True
Exemple #19
0
    def plurr_placeholders(self, str1, str2, **kwargs):
        """For plurr-formatted strings, checks placeholders used in target
        strings actually exist in the source string.
        """
        if str2 == u'' or not plurr_placeholders_regex.search(str1):
            return True

        placeholders_source = map(
            clean_plurr_placeholder,
            filter(None,
                   reduce(lambda x, y: x + y,
                          map(list, plurr_placeholders_regex.findall(str1)),
                          []))
        )
        placeholders_target = map(
            clean_plurr_placeholder,
            filter(None,
                   reduce(lambda x, y: x + y,
                          map(list, plurr_placeholders_regex.findall(str2)),
                          []))
        )
        if set(placeholders_source) == set(placeholders_target):
            return True

        unknown_in_target = set(placeholders_target) - set(placeholders_source)
        if len(unknown_in_target) > 0:
            raise checks.FilterFailure(
                u'Unknown placeholders in translation: %s' %
                u', '.join(unknown_in_target)
            )

        missing_in_translation = set(placeholders_source) - set(placeholders_target)
        if len(missing_in_translation) > 0:
            raise checks.FilterFailure(
                u'Placeholders missing in translation: %s' %
                u', '.join(missing_in_translation)
            )

        return True
Exemple #20
0
    def mustache_placeholder_pairs(self, str1, str2):
        def get_fingerprint(str, is_source=False, translation=''):
            chunks = mustache_placeholder_pairs_regex.split(str)
            translate = False
            fingerprint = 1

            if is_source:
                if not mustache_placeholder_pairs_open_tag_regex.search(str1):
                    raise SkipCheck()

                return fingerprint

            stack = []
            for chunk in chunks:
                translate = not translate

                if translate:
                    # ordinary text (safe to translate)
                    continue

                # special text
                tag = chunk[3:-2]  # extract 'tagname' from '{{#tagname}}'

                if chunk[2:3] in ['#', '^']:
                    # opening tag
                    # check that all similar tags were closed
                    if tag in stack:
                        fingerprint = 0
                        break
                    stack.append(tag)

                else:
                    # closing tag '{{/tagname}}'
                    if len(stack) == 0 or not stack[-1] == tag:
                        fingerprint = 0
                        break
                    else:
                        stack.pop()

            if len(stack) > 0:
                fingerprint = 0

            return fingerprint

        if check_translation(get_fingerprint, str1, str2):
            return True
        else:
            raise checks.FilterFailure(u"mustache_placeholder_pairs")
Exemple #21
0
    def double_quotes_in_tags(self, str1, str2):
        """Checks whether double quotation mark `"` in tags is consistent between the
-        two strings.
        """
        def get_fingerprint(str, is_source=False, translation=''):
            chunks = unbalanced_tag_braces_regex.split(str)
            translate = False
            level = 0
            d = {}
            fingerprint = ''
            quotes_paired = True

            for chunk in chunks:
                translate = not translate
                if translate:
                    if level > 0:
                        d[level] += chunk.count('"')
                    continue

                # special text
                if level >= 0:
                    if chunk == '<':
                        level += 1
                        if level not in d:
                            d[level] = 0

                    if chunk == '>':
                        level -= 1

            for key in sorted([x for x in d.keys() if d[x] > 0]):
                fingerprint += u"\001%s\001%s" % (key, d[key])
                quotes_paired &= d[key] % 2 == 0

            return fingerprint, quotes_paired

        # hardcoded rule: skip web banner images which are translated
        # differently
        if img_banner_regex.match(str1):
            return True

        fingerprint1, paired1 = get_fingerprint(str1, is_source=True)
        if paired1:
            fingerprint2, paired2 = get_fingerprint(str2, is_source=False)
            if fingerprint1 == '' and paired2 or fingerprint1 == fingerprint2:
                return True

        raise checks.FilterFailure(u"Double quotes in tags mismatch")
Exemple #22
0
    def date_format(self, str1, str2, **kwargs):
        def get_fingerprint(string, is_source=False, translation=""):
            is_date_format = bool(date_format_regex.match(string))
            if is_source:
                if not is_date_format:
                    raise SkipCheck()

                # filter out specific English strings which are not dates
                if date_format_exception_regex.match(string):
                    raise SkipCheck()

            return is_date_format

        if check_translation(get_fingerprint, str1, str2):
            return True

        raise checks.FilterFailure(u"Incorrect date format")
Exemple #23
0
    def mustache_like_placeholder_pairs(self, str1, str2):
        def get_fingerprint(str, is_source=False, translation=''):
            chunks = mustache_like_placeholder_pairs_regex.split(str)
            translate = False
            fingerprint = 1
            d = {}

            if is_source:
                if mustache_placeholder_pairs_open_tag_regex.search(str1):
                    raise SkipCheck()

                return fingerprint

            for chunk in chunks:
                translate = not translate

                if translate:
                    # ordinary text (safe to translate)
                    continue

                # special text
                if chunk[2:3] != '/':
                    # opening tag
                    tag = chunk[2:-2]
                    if chunk not in d:
                        d[tag] = 1
                    else:
                        d[tag] += 1
                else:
                    # closing tag
                    # extract 'tagname' from '{{/tagname}}'
                    tag = chunk[3:-2]
                    if tag not in d or d[tag] == 0:
                        fingerprint = None
                        break

                    d[tag] -= 1

            return fingerprint

        if check_translation(get_fingerprint, str1, str2):
            return True
        else:
            raise checks.FilterFailure(u"mustache_like_placeholder_pairs")
Exemple #24
0
    def tags_differ(self, str1, str2):
        def get_fingerprint(str, is_source=False, translation=''):

            if is_source:
                # hardcoded rule: skip web banner images which are translated
                # differently
                if img_banner_regex.match(str):
                    raise SkipCheck()

                # hardcoded rules for strings that look like tags but are
                # not them
                if no_tags_regex.match(str):
                    raise SkipCheck()

            chunks = tags_differ_regex_0.split(str)
            translate = False
            fingerprint = ''
            d = {}

            for chunk in chunks:
                translate = not translate
                if translate:
                    # ordinary text (safe to translate)
                    continue

                # special text
                mo = tags_differ_regex_1.match(chunk)

                if mo:
                    tag = mo.group(1)
                    if tag in d:
                        d[tag] += 1
                    else:
                        d[tag] = 1

            for key in sorted(d.keys()):
                fingerprint += u"\001%s\001%s" % (key, d[key])

            return fingerprint

        if check_translation(get_fingerprint, str1, str2):
            return True
        else:
            raise checks.FilterFailure(u"Tags differ")
Exemple #25
0
    def c_format(self, str1, str2):
        def get_fingerprint(str, is_source=False, translation=''):
            chunks = c_format_regex.split(str)
            translate = False
            fingerprint = ''
            for chunk in chunks:
                translate = not translate

                if translate:
                    # ordinary text (safe to translate)
                    continue

                # special text
                fingerprint += u"\001%s" % chunk

            return fingerprint

        if check_translation(get_fingerprint, str1, str2):
            return True
        else:
            raise checks.FilterFailure(u"Incorrect C format")
Exemple #26
0
    def unescaped_ampersands(self, str1, str2):
        def get_fingerprint(str, is_source=False, translation=''):
            # skip comparing strings if there are no ampersands in the
            # translation
            if is_source and u"&" not in translation:
                return None

            chunks = unescaped_ampersands_regex.split(str)
            translate = False
            fingerprint = 0
            escaped_count = 0
            unescaped_count = 0

            for chunk in chunks:
                translate = not translate

                if translate:
                    # ordinary text (safe to translate)
                    continue

                # special text
                if chunk == '&':
                   unescaped_count += 1
                else:
                    escaped_count += 1

            # fingerprint will not count the number of & or &amp;, but
            # just the fact of their presence
            if unescaped_count > 0:
                fingerprint = 2
            if escaped_count > 0:
                fingerprint += 1

            return fingerprint

        if check_translation(get_fingerprint, str1, str2):
            return True
        else:
            raise checks.FilterFailure(u"Unescaped ampersand mismatch")
Exemple #27
0
    def non_printable(self, str1, str2):
        def get_fingerprint(str, is_source=False, translation=''):
            chunks = non_printable_regex.split(str)
            translate = False
            fingerprint = ''

            for chunk in chunks:
                translate = not translate
                if translate:
                    # ordinary text (safe to translate)
                    continue

                # special text
                chunk = '{0x%02x}' % ord(chunk)
                fingerprint += u"\001%s" % chunk

            return fingerprint

        if check_translation(get_fingerprint, str1, str2):
            return True
        else:
            raise checks.FilterFailure(u"Non printable mismatch")
Exemple #28
0
    def unbalanced_curly_braces(self, str1, str2, **kwargs):
        def get_fingerprint(string, is_source=False, translation=""):
            chunks = unbalanced_curly_braces_regex.split(string)
            translate = False
            count = 0
            level = 0

            for chunk in chunks:
                translate = not translate
                if translate:
                    # ordinary text (safe to translate)
                    continue

                # special text
                count += 1
                if level >= 0:
                    if chunk == "{":
                        level += 1
                    if chunk == "}":
                        level -= 1

            fingerprint = u"%d\001%d" % (count, level)

            # if source string has unbalanced tags, always report it
            if is_source and not level == 0:
                # just make the fingerprint different by one symbol
                fingerprint += u"\001"

            return fingerprint

        # Ignore check for Plurr-formatted strings
        if plurr_format_regex.search(str1):
            return True

        if check_translation(get_fingerprint, str1, str2):
            return True

        raise checks.FilterFailure(u"Unbalanced curly braces")
Exemple #29
0
    def potential_unwanted_placeholders(self, str1, str2):
        def get_fingerprint(str, is_source=False, translation=''):
            chunks = potential_placeholders_regex.split(str)
            translate = False
            fingerprint = 0

            for chunk in chunks:
                translate = not translate
                if translate:
                    # ordinary text (safe to translate)
                    continue

                # placeholder sign
                fingerprint += 1

            return fingerprint

        a_fingerprint = get_fingerprint(str1, True, str2)
        b_fingerprint = get_fingerprint(str2, False, str1)

        if a_fingerprint >= b_fingerprint:
            return True
        else:
            raise checks.FilterFailure(u"Potential unwanted placeholders")
Exemple #30
0
    def broken_entities(self, str1, str2):
        def get_fingerprint(str, is_source=False, translation=''):
            chunks = broken_entities_regex_0.split(str)
            translate = False
            fingerprint = 1

            for chunk in chunks:
                translate = not translate
                if translate:
                    # ordinary text (safe to translate)
                    continue

                # special text
                # check if ';' is present at the end for some known named
                # entities that should never match as false positives in
                # the normal text
                if broken_entities_regex_1.match(chunk):
                    fingerprint += 1

                # check if ';' is present at the end for numeric and
                # hexadecimal entities
                if broken_entities_regex_2.match(chunk):
                    fingerprint += 1

                # check if a prefix '#' symbol is missing for a numeric
                # entity
                if broken_entities_regex_3.match(chunk):
                    fingerprint += 1

                # check if a prefix '#' symbol is missing for a hexadecimal
                # entity
                if broken_entities_regex_4.match(chunk):
                    fingerprint += 1

                # check if a prefix 'x' symbol is missing (or replaced with
                # something else) for a hexadecimal entity
                mo = broken_entities_regex_5.match(chunk)
                if mo:
                    regex = re.compile(u"\D", re.U)
                    if regex.match(mo.group(1)) or \
                        regex.match(mo.group(2)):
                        fingerprint += 1

                # the checks below are conservative, i.e. they do not include
                # the full valid Unicode range but just test for common
                # mistakes in real-life XML/HTML entities

                # check if a numbered entity is within acceptable range
                mo = broken_entities_regex_6.match(chunk)
                if mo:
                    number = int(mo.group(1))
                    if number > 65535:
                        fingerprint += 1

                # check if a hexadecimal numbered entity length is within
                # acceptable range
                mo = broken_entities_regex_7.match(chunk)
                if mo:
                    v = int(mo.group(1), 16)
                    if v > 65535:
                        fingerprint += 1

            if is_source and fingerprint > 1:
                fingerprint = u"%d\001" % fingerprint

            return fingerprint

        if check_translation(get_fingerprint, str1, str2):
            return True
        else:
            raise checks.FilterFailure(u"Broken HTML entities")