예제 #1
0
 def check_string(self, ctx, message, s):
     prefix = message_repr(message, template='{}:')
     fmt = None
     try:
         fmt = backend.FormatString(s)
     except backend.ArgumentTypeMismatch as exc:
         [s, key, types] = exc.args  # pylint: disable=unbalanced-tuple-unpacking
         self.tag(
             'python-format-string-error',
             prefix,
             tags.safestr(exc.message),
             tags.safestr(key),
             tags.safestr(', '.join(sorted(x for x in types))),
         )
     except backend.Error as exc:
         self.tag('python-format-string-error', prefix,
                  tags.safestr(exc.message), *exc.args[:1])
     if fmt is None:
         return
     for warn in fmt.warnings:
         try:
             raise warn
         except backend.RedundantFlag as exc:
             if len(exc.args) == 2:
                 [s, *args] = exc.args
             else:
                 [s, a1, a2] = exc.args
                 if a1 == a2:
                     args = ['duplicate', a1]
                 else:
                     args = [a1, tags.safe_format('overridden by {}', a2)]
             args += ['in', s]
             self.tag('python-format-string-redundant-flag', prefix, *args)
         except backend.RedundantPrecision as exc:
             [s, a] = exc.args
             self.tag('python-format-string-redundant-precision', prefix, a,
                      'in', s)
         except backend.RedundantLength as exc:
             [s, a] = exc.args
             self.tag('python-format-string-redundant-length', prefix, a,
                      'in', s)
         except backend.ObsoleteConversion as exc:
             [s, c1, c2] = exc.args
             args = [c1, '=>', c2]
             if s != c1:
                 args += ['in', s]
             self.tag('python-format-string-obsolete-conversion', prefix,
                      *args)
     if ctx.is_template:
         if len(fmt.seq_conversions) > 1:
             self.tag('python-format-string-multiple-unnamed-arguments',
                      message_repr(message))
         elif len(fmt.seq_conversions) == 1:
             arg_for_plural = (message.msgid_plural is not None
                               and fmt.seq_conversions[0].type == 'int')
             if arg_for_plural:
                 self.tag('python-format-string-unnamed-plural-argument',
                          message_repr(message))
     return fmt
예제 #2
0
 def _check_message_xml_format(self, ctx, message, flags):
     if ctx.encoding is None:
         return
     prefix = message_repr(message, template='{}:')
     try:
         xml.check_fragment(message.msgid)
     except xml.SyntaxError as exc:
         if ctx.is_template:
             self.tag('malformed-xml', prefix, tags.safestr(exc))
         return
     if flags.fuzzy:
         return
     if not message.msgstr:
         return
     try:
         xml.check_fragment(message.msgstr)
     except xml.SyntaxError as exc:
         self.tag('malformed-xml', prefix, tags.safestr(exc))
예제 #3
0
 def check_dates(self, ctx):
     try:
         content_type = ctx.metadata['Content-Type'][0]
     except IndexError:
         content_type = ''
     is_publican = content_type.startswith('application/x-publican;')
     for field in 'POT-Creation-Date', 'PO-Revision-Date':
         dates = ctx.metadata[field]
         if len(dates) > 1:
             self.tag('duplicate-header-field-date', field)
             dates = sorted(set(dates))
         elif len(dates) == 0:
             self.tag('no-date-header-field', field)
             continue
         for date in dates:
             if ctx.is_template and field.startswith('PO-') and (
                     date == gettext.boilerplate_date):
                 continue
             if 'T' in date and is_publican:
                 # Publican uses DateTime->now(), which uses the UTC timezone by default:
                 # https://sources.debian.net/src/publican/2.8-3/lib/Publican/Translate.pm/?hl=748#L744
                 # https://bugs.debian.org/714739
                 tz_hint = '-0000'
             else:
                 tz_hint = None
             try:
                 fixed_date = gettext.fix_date_format(date, tz_hint=tz_hint)
             except gettext.BoilerplateDate:
                 self.tag('boilerplate-in-date', tags.safestr(field + ':'),
                          date)
                 continue
             except gettext.DateSyntaxError:
                 self.tag('invalid-date', tags.safestr(field + ':'), date)
                 continue
             else:
                 if date != fixed_date:
                     self.tag('invalid-date', tags.safestr(field + ':'),
                              date, '=>', fixed_date)
             stamp = gettext.parse_date(fixed_date)
             if stamp > misc.utc_now():
                 self.tag('date-from-future', tags.safestr(field + ':'),
                          date)
             if stamp < gettext.epoch:
                 self.tag('ancient-date', tags.safestr(field + ':'), date)
예제 #4
0
 def check_string(self, ctx, message, s):
     prefix = message_repr(message, template='{}:')
     fmt = None
     try:
         fmt = backend.FormatString(s)
     except backend.MissingArgument as exc:
         self.tag(
             'c-format-string-error',
             prefix,
             tags.safestr(exc.message),
             tags.safestr('{1}$'.format(*exc.args)),
         )
     except backend.ArgumentTypeMismatch as exc:
         self.tag(
             'c-format-string-error',
             prefix,
             tags.safestr(exc.message),
             tags.safestr('{1}$'.format(*exc.args)),
             tags.safestr(', '.join(sorted(x for x in exc.args[2]))),
         )
     except backend.FlagError as exc:
         [conv, flag] = exc.args  # pylint: disable=unbalanced-tuple-unpacking
         self.tag('c-format-string-error', prefix,
                  tags.safestr(exc.message), flag, tags.safestr('in'), conv)
     except backend.Error as exc:
         self.tag('c-format-string-error', prefix,
                  tags.safestr(exc.message), *exc.args[:1])
     if fmt is None:
         return
     for warn in fmt.warnings:
         try:
             raise warn
         except backend.RedundantFlag as exc:
             if len(exc.args) == 2:
                 [s, *args] = exc.args
             else:
                 [s, a1, a2] = exc.args
                 if a1 == a2:
                     args = ['duplicate', a1]
                 else:
                     args = [a1, tags.safe_format('overridden by {}', a2)]
             args += ['in', s]
             self.tag('c-format-string-redundant-flag', prefix, *args)
         except backend.NonPortableConversion as exc:
             [s, c1, c2] = exc.args
             args = [c1, '=>', c2]
             if s != c1:
                 args += ['in', s]
             self.tag('c-format-string-non-portable-conversion', prefix,
                      *args)
     return fmt
예제 #5
0
 def check_args(self,
                message,
                src_loc,
                src_fmt,
                dst_loc,
                dst_fmt,
                *,
                omitted_int_conv_ok=False):
     prefix = message_repr(message, template='{}:')
     src_args = src_fmt.arguments
     dst_args = dst_fmt.arguments
     if len(dst_args) > len(src_args):
         self.tag(
             'c-format-string-excess-arguments',
             prefix,
             len(dst_args),
             tags.safestr('({})'.format(dst_loc)),
             '>',
             len(src_args),
             tags.safestr('({})'.format(src_loc)),
         )
     elif len(dst_args) < len(src_args):
         if omitted_int_conv_ok:
             n_args_omitted = len(src_args) - len(dst_args)
             omitted_int_conv_ok = src_fmt.get_last_integer_conversion(
                 n=n_args_omitted)
         if not omitted_int_conv_ok:
             self.tag(
                 'c-format-string-missing-arguments',
                 prefix,
                 len(dst_args),
                 tags.safestr('({})'.format(dst_loc)),
                 '<',
                 len(src_args),
                 tags.safestr('({})'.format(src_loc)),
             )
     for src_arg, dst_arg in zip(src_args, dst_args):
         src_arg = src_arg[0]
         dst_arg = dst_arg[0]
         if src_arg.type != dst_arg.type:
             self.tag(
                 'c-format-string-argument-type-mismatch',
                 prefix,
                 tags.safestr(dst_arg.type),
                 tags.safestr('({})'.format(dst_loc)),
                 '!=',
                 tags.safestr(src_arg.type),
                 tags.safestr('({})'.format(src_loc)),
             )
예제 #6
0
 def check_messages(self, ctx):
     found_unusual_characters = set()
     msgid_counter = collections.Counter()
     for message in ctx.file:
         if message.obsolete:
             continue
         if is_header_entry(message):
             continue
         flags = self._check_message_flags(message)
         self._check_message_formats(ctx, message, flags)
         msgid_counter[message.msgid, message.msgctxt] += 1
         if msgid_counter[message.msgid, message.msgctxt] == 2:
             self.tag('duplicate-message-definition', message_repr(message))
         has_msgstr = bool(message.msgstr)
         has_msgstr_plural = any(message.msgstr_plural.values())
         if ctx.is_template:
             if has_msgstr or has_msgstr_plural:
                 self.tag('translation-in-template', message_repr(message))
         leading_lf = message.msgid.startswith('\n')
         trailing_lf = message.msgid.endswith('\n')
         has_previous_msgid = any(s is not None for s in [
             message.previous_msgctxt,
             message.previous_msgid,
             message.previous_msgid_plural,
         ])
         if has_previous_msgid and not flags.fuzzy:
             self.tag('stray-previous-msgid', message_repr(message))
         strings = []
         if message.msgid_plural is not None:
             strings += [message.msgid_plural]
         if not flags.fuzzy:
             if has_msgstr:
                 strings += [message.msgstr]
             if has_msgstr_plural:
                 strings += message.msgstr_plural.values(
                 )  # the order doesn't matter here
         for s in strings:
             if s.startswith('\n') != leading_lf:
                 self.tag('inconsistent-leading-newlines',
                          message_repr(message))
                 break
         for s in strings:
             if s.endswith('\n') != trailing_lf:
                 self.tag('inconsistent-trailing-newlines',
                          message_repr(message))
                 break
         strings = []
         if has_msgstr:
             strings += [message.msgstr]
         if has_msgstr_plural:
             strings += misc.sorted_vk(message.msgstr_plural)
         if ctx.encoding is not None:
             msgid_uc = (set(find_unusual_characters(message.msgid)) | set(
                 find_unusual_characters(message.msgid_plural or '')))
             for msgstr in strings:
                 msgstr_uc = set(find_unusual_characters(msgstr))
                 uc = msgstr_uc - msgid_uc - found_unusual_characters
                 if not uc:
                     continue
                 names = ', '.join('U+{:04X} {}'.format(
                     ord(ch), encinfo.get_character_name(ch))
                                   for ch in sorted(uc))
                 self.tag('unusual-character-in-translation',
                          message_repr(message, template='{}:'),
                          tags.safestr(names))
                 found_unusual_characters |= uc
         if not flags.fuzzy:
             for msgstr in strings:
                 conflict_marker = gettext.search_for_conflict_marker(
                     msgstr)
                 if conflict_marker is not None:
                     conflict_marker = conflict_marker.group(0)
                     self.tag('conflict-marker-in-translation',
                              message_repr(message), conflict_marker)
                     break
             if has_msgstr_plural and not all(
                     message.msgstr_plural.values()):
                 self.tag('partially-translated-message',
                          message_repr(message))
     if len(msgid_counter) == 0:
         possible_hidden_strings = False
         if isinstance(ctx.file, polib.MOFile):
             possible_hidden_strings = ctx.file.possible_hidden_strings
         if not possible_hidden_strings:
             self.tag('empty-file')
예제 #7
0
 def check_headers(self, ctx):
     metadata = collections.defaultdict(list)
     strays = []
     ctx.file.header_entry = None
     seen_header_entry = False
     for entry in ctx.file:
         if not is_header_entry(entry) or entry.obsolete:
             continue
         if seen_header_entry:
             self.tag('duplicate-header-entry')
             break
         if entry.occurrences:
             self.tag(
                 'empty-msgid-message-with-source-code-references',
                 *(':'.join((path, line))
                   for path, line in entry.occurrences))
         if entry.msgid_plural is not None:
             self.tag('empty-msgid-message-with-plural-forms')
         msgstr = entry.msgstr_plural.get(0, entry.msgstr)
         # At least in polib 1.0.0, if the source PO file is empty,
         # msgstr for the header entry is None.
         msgstr = msgstr or ''
         for line in gettext.parse_header(msgstr):
             if isinstance(line, dict):
                 [(key, value)] = line.items()
                 metadata[key] += [value]
             else:
                 strays += [line]
         flags = collections.Counter(entry.flags)
         for flag, n in sorted(flags.items()):
             if flag == 'fuzzy':
                 if not ctx.is_template:
                     self.tag('fuzzy-header-entry')
             elif difflib.get_close_matches(flag.lower(), ['fuzzy'],
                                            cutoff=0.8):
                 self.tag('unexpected-flag-for-header-entry', flag, '=>',
                          'fuzzy')
             else:
                 self.tag('unexpected-flag-for-header-entry', flag)
             if n > 1:
                 self.tag('duplicate-flag-for-header-entry', flag)
         if entry is not ctx.file[0]:
             self.tag('distant-header-entry')
         unusual_chars = set(find_unusual_characters(msgstr))
         if unusual_chars:
             unusual_char_names = ', '.join('U+{:04X} {}'.format(
                 ord(ch), encinfo.get_character_name(ch))
                                            for ch in sorted(unusual_chars))
             self.tag('unusual-character-in-header-entry',
                      tags.safestr(unusual_char_names))
         seen_header_entry = True
     seen_conflict_marker = False
     for stray in strays:
         if gettext.search_for_conflict_marker(stray):
             if not seen_conflict_marker:
                 self.tag('conflict-marker-in-header-entry', stray)
                 seen_conflict_marker = True
         else:
             self.tag('stray-header-line', stray)
     header_fields = frozenset(gettext.header_fields)
     header_fields_lc = {str.lower(s): s for s in header_fields}
     for key, values in sorted(metadata.items()):
         if key.startswith(('X-', 'x-')):
             pass  # ok
         elif key in header_fields:
             pass  # ok
         else:
             hint = header_fields_lc.get(key.lower())
             if hint is None:
                 hints = difflib.get_close_matches(key,
                                                   header_fields,
                                                   n=1,
                                                   cutoff=0.8)
                 if hints:
                     [hint] = hints
             if hint in metadata:
                 hint = None
             if hint is None:
                 self.tag('unknown-header-field', key)
             else:
                 self.tag('unknown-header-field', key, '=>', hint)
         if len(values
                ) > 1 and key not in header_fields_with_dedicated_checks:
             self.tag('duplicate-header-field', key)
     ctx.metadata = metadata
     del ctx.file.metadata
     del ctx.file.metadata_is_fuzzy
예제 #8
0
 def check_mime(self, ctx):
     ctx.encoding = None
     # MIME-Version:
     mime_versions = ctx.metadata['MIME-Version']
     if len(mime_versions) > 1:
         self.tag('duplicate-header-field-mime-version')
         mime_versions = sorted(set(mime_versions))
     for mime_version in mime_versions:
         if mime_version != '1.0':
             self.tag('invalid-mime-version', mime_version, '=>', '1.0')
     if len(mime_versions) == 0:
         self.tag('no-mime-version-header-field',
                  tags.safestr('MIME-Version: 1.0'))
     # Content-Transfer-Encoding:
     ctes = ctx.metadata['Content-Transfer-Encoding']
     if len(ctes) > 1:
         self.tag('duplicate-header-field-content-transfer-encoding')
         ctes = sorted(set(ctes))
     for cte in ctes:
         if cte != '8bit':
             self.tag('invalid-content-transfer-encoding', cte, '=>',
                      '8bit')
     if len(ctes) == 0:
         self.tag('no-content-transfer-encoding-header-field',
                  tags.safestr('Content-Transfer-Encoding: 8bit'))
     # Content-Type:
     cts = ctx.metadata['Content-Type']
     if len(cts) > 1:
         self.tag('duplicate-header-field-content-type')
         cts = sorted(set(cts))
     elif len(cts) == 0:
         content_type_hint = 'text/plain; charset=<encoding>'
         self.tag('no-content-type-header-field',
                  tags.safestr('Content-Type: ' + content_type_hint))
         return
     encodings = set()
     for ct in cts:
         content_type_hint = 'text/plain; charset=<encoding>'
         match = re.search(r'(\Atext/plain; )?\bcharset=([^\s;]+)\Z', ct)
         if match:
             encoding = match.group(2)
             try:
                 is_ascii_compatible = encinfo.is_ascii_compatible_encoding(
                     encoding, missing_ok=False)
             except encinfo.EncodingLookupError:
                 if encoding == 'CHARSET':
                     if not ctx.is_template:
                         self.tag('boilerplate-in-content-type', ct)
                 else:
                     self.tag('unknown-encoding', encoding)
                 encoding = None
             else:
                 if not is_ascii_compatible:
                     self.tag('non-ascii-compatible-encoding', encoding)
                 elif encinfo.is_portable_encoding(encoding):
                     pass
                 else:
                     new_encoding = encinfo.propose_portable_encoding(
                         encoding)
                     if new_encoding is not None:
                         self.tag('non-portable-encoding', encoding, '=>',
                                  new_encoding)
                         encoding = new_encoding
                     else:
                         self.tag('non-portable-encoding', encoding)
                 if ctx.language is not None:
                     unrepresentable_characters = ctx.language.get_unrepresentable_characters(
                         encoding)
                     if unrepresentable_characters:
                         if len(unrepresentable_characters) > 5:
                             unrepresentable_characters[4:] = ['...']
                         self.tag('unrepresentable-characters', encoding,
                                  *unrepresentable_characters)
             if match.group(1) is None:
                 if encoding is not None:
                     content_type_hint = content_type_hint.replace(
                         '<encoding>', encoding)
                 self.tag('invalid-content-type', ct, '=>',
                          content_type_hint)
             if encoding is not None:
                 encodings.add(encoding)
         else:
             self.tag('invalid-content-type', ct, '=>', content_type_hint)
     if len(encodings) == 1:
         [ctx.encoding] = encodings
예제 #9
0
 def check_plurals(self, ctx):
     ctx.plural_preimage = None
     plural_forms = ctx.metadata['Plural-Forms']
     if len(plural_forms) > 1:
         self.tag('duplicate-header-field-plural-forms')
         plural_forms = sorted(set(plural_forms))
         if len(plural_forms) > 1:
             return
     if len(plural_forms) == 1:
         [plural_forms] = plural_forms
     else:
         assert len(plural_forms) == 0
         plural_forms = None
     correct_plural_forms = None
     if ctx.language is not None:
         correct_plural_forms = ctx.language.get_plural_forms()
     has_plurals = False  # messages with plural forms (translated or not)?
     expected_nplurals = {}  # number of plurals in _translated_ messages
     for message in ctx.file:
         if message.obsolete:
             continue
         if message.msgid_plural is not None:
             has_plurals = True
             if not message.translated():
                 continue
             expected_nplurals[len(message.msgstr_plural)] = message
             if len(expected_nplurals) > 1:
                 break
     if len(expected_nplurals) > 1:
         args = []
         for n, message in sorted(expected_nplurals.items()):
             args += [n, message_repr(message, template='({})'), '!=']
         self.tag('inconsistent-number-of-plural-forms', *args[:-1])
     if ctx.is_template:
         plural_forms_hint = 'nplurals=INTEGER; plural=EXPRESSION;'
     elif correct_plural_forms:
         plural_forms_hint = tags.safe_format(
             ' or '.join('{}' for s in correct_plural_forms),
             *correct_plural_forms)
     else:
         plural_forms_hint = 'nplurals=<n>; plural=<expression>'
     if plural_forms is None:
         if has_plurals:
             if expected_nplurals:
                 self.tag('no-required-plural-forms-header-field',
                          plural_forms_hint)
             else:
                 self.tag('no-plural-forms-header-field', plural_forms_hint)
         return
     if ctx.is_template:
         return
     try:
         (n, expr, ljunk, rjunk) = gettext.parse_plural_forms(plural_forms,
                                                              strict=False)
     except gettext.PluralFormsSyntaxError:
         if has_plurals:
             self.tag('syntax-error-in-plural-forms', plural_forms, '=>',
                      plural_forms_hint)
         else:
             self.tag('syntax-error-in-unused-plural-forms', plural_forms,
                      '=>', plural_forms_hint)
         return
     if ljunk:
         self.tag('leading-junk-in-plural-forms', ljunk)
     if rjunk:
         self.tag('trailing-junk-in-plural-forms', rjunk)
     if len(expected_nplurals) == 1:
         [expected_nplurals] = expected_nplurals.keys()
         if n != expected_nplurals:
             self.tag('incorrect-number-of-plural-forms', n,
                      tags.safestr('(Plural-Forms header field)'), '!=',
                      expected_nplurals,
                      tags.safestr('(number of msgstr items)'))
     locally_correct_n = locally_correct_expr = None
     if correct_plural_forms is not None:
         locally_correct_plural_forms = [
             (i, expression) for i, expression in map(
                 gettext.parse_plural_forms, correct_plural_forms) if i == n
         ]
         if not locally_correct_plural_forms:
             if has_plurals:
                 self.tag('unusual-plural-forms', plural_forms, '=>',
                          plural_forms_hint)
             else:
                 self.tag('unusual-unused-plural-forms', plural_forms, '=>',
                          plural_forms_hint)
         elif len(locally_correct_plural_forms) == 1:
             [[locally_correct_n,
               locally_correct_expr]] = locally_correct_plural_forms
     plural_preimage = collections.defaultdict(list)
     unusual_plural_forms = False
     codomain_limit = 200
     try:
         for i in range(codomain_limit):
             fi = expr(i)
             if fi >= n:
                 message = tags.safe_format('f({}) = {} >= {}'.format(
                     i, fi, n))
                 if has_plurals:
                     self.tag('codomain-error-in-plural-forms', message)
                 else:
                     self.tag('codomain-error-in-unused-plural-forms',
                              message)
                 break
             plural_preimage[fi] += [i]
             if (n == locally_correct_n) and (fi !=
                                              locally_correct_expr(i)) and (
                                                  not unusual_plural_forms):
                 if has_plurals:
                     self.tag('unusual-plural-forms', plural_forms, '=>',
                              plural_forms_hint)
                 else:
                     self.tag('unusual-unused-plural-forms', plural_forms,
                              '=>', plural_forms_hint)
                 unusual_plural_forms = True
         else:
             ctx.plural_preimage = dict(plural_preimage)
     except OverflowError:
         message = tags.safe_format('f({}): integer overflow', i)
         if has_plurals:
             self.tag('arithmetic-error-in-plural-forms', message)
         else:
             self.tag('arithmetic-error-in-unused-plural-forms', message)
     except ZeroDivisionError:
         message = tags.safe_format('f({}): division by zero', i)
         if has_plurals:
             self.tag('arithmetic-error-in-plural-forms', message)
         else:
             self.tag('arithmetic-error-in-unused-plural-forms', message)
     codomain = expr.codomain()
     if codomain is not None:
         (x, y) = codomain
         uncov_rngs = []
         if x > 0:
             uncov_rngs += [range(0, x)]
         if y + 1 < n:
             uncov_rngs += [range(y + 1, n)]
     if (not uncov_rngs) and (ctx.plural_preimage is not None):
         period = expr.period()
         if period is None:
             period = (0, 1e999)
         if sum(period) < codomain_limit:
             for i in sorted(ctx.plural_preimage):
                 if (i > 0) and (i - 1 not in ctx.plural_preimage):
                     uncov_rngs += [range(i - 1, i)]
                     break
                 if (i + 1 < n) and (i + 1 not in ctx.plural_preimage):
                     uncov_rngs += [range(i + 1, i + 2)]
                     break
     for rng in uncov_rngs:
         rng = misc.format_range(rng, max=5)
         message = tags.safestr('f(x) != {}'.format(rng))
         if has_plurals:
             self.tag('codomain-error-in-plural-forms', message)
         else:
             self.tag('codomain-error-in-unused-plural-forms', message)
         ctx.plural_preimage = None
예제 #10
0
 def check_language(self, ctx):
     ctx.language = None
     duplicate_meta_language = False
     meta_languages = ctx.metadata['Language']
     if len(meta_languages) > 1:
         self.tag('duplicate-header-field-language')
         meta_languages = sorted(set(meta_languages))
         if len(meta_languages) > 1:
             duplicate_meta_language = True
     if len(meta_languages) == 1:
         [meta_language] = meta_languages
     else:
         meta_language = None
     orig_meta_language = meta_language
     if ctx.is_template:
         if meta_language is None:
             self.tag('no-language-header-field')
         return
     language = self.options.language
     language_source = 'command-line'
     language_source_quality = 1
     if language is None:
         path_components = os.path.normpath(self.path).split('/')
         try:
             i = path_components.index('LC_MESSAGES')
         except ValueError:
             i = 0
         if i > 0:
             language = path_components[i - 1]
             try:
                 language = ling.parse_language(language)
                 language.fix_codes()
                 language.remove_encoding()
                 language.remove_nonlinguistic_modifier()
             except ling.LanguageError:
                 # It's not our job to report possible errors in _pathnames_.
                 language = None
             else:
                 language_source = 'pathname'
         del path_components, i
     if language is None and self.path.endswith('.po'):
         language, ext = os.path.splitext(os.path.basename(self.path))
         assert ext == '.po'
         try:
             language = ling.parse_language(language)
             if language.encoding is not None:
                 # It's very likely that something else has been confused
                 # for the apparent encoding.
                 raise ling.LanguageError
             language.fix_codes()
             language.remove_nonlinguistic_modifier()
         except ling.LanguageError:
             # It's not our job to report possible errors in _pathnames_.
             language = None
         else:
             language_source = 'pathname'
             language_source_quality = 0
     if meta_language:
         try:
             meta_language = ling.parse_language(meta_language)
         except ling.LanguageError:
             try:
                 new_meta_language = ling.get_language_for_name(
                     meta_language)
             except LookupError:
                 new_meta_language = None
             if new_meta_language:
                 self.tag('invalid-language', orig_meta_language, '=>',
                          new_meta_language)
             else:
                 self.tag('invalid-language', orig_meta_language)
             meta_language = new_meta_language
     if meta_language:
         if meta_language.remove_encoding():
             self.tag('encoding-in-language-header-field',
                      orig_meta_language)
         if meta_language.remove_nonlinguistic_modifier():
             self.tag('language-variant-does-not-affect-translation',
                      orig_meta_language)
         try:
             if meta_language.fix_codes():
                 self.tag('invalid-language', orig_meta_language, '=>',
                          meta_language)
         except ling.LanguageError:
             self.tag('invalid-language', orig_meta_language)
             meta_language = None
         if language_source_quality <= 0 and ('/{lang}/'.format(
                 lang=meta_language) in self.path or '/{lang}/'.format(
                     lang=str(meta_language).replace('_', '-'))
                                              in self.path):
             # For LibreOffice, PO basename does not designate translation
             # language, but one of the path components does.
             # For example,
             #   translations/source/da/dictionaries/pl_PL.po
             # is a Danish translation.
             language = None
     if meta_language:
         if language is None:
             language = meta_language
             language_source = 'Language header field'
         elif language != meta_language:
             self.tag('language-disparity', language,
                      tags.safestr('({})'.format(language_source)),
                      '!=', meta_language,
                      tags.safestr('(Language header field)'))
     poedit_languages = ctx.metadata['X-Poedit-Language']
     if len(poedit_languages) > 1:
         self.tag('duplicate-header-field-x-poedit', 'X-Poedit-Language')
         poedit_languages = sorted(set(poedit_languages))
     poedit_countries = ctx.metadata['X-Poedit-Country']
     if len(poedit_countries) > 1:
         self.tag('duplicate-header-field-x-poedit', 'X-Poedit-Country')
         poedit_countries = sorted(set(poedit_countries))
     if len(poedit_languages) == 1 and len(poedit_countries) <= 1:
         [poedit_language] = poedit_languages
         # FIXME: This should take also X-Poedit-Country into account.
         try:
             poedit_language = ling.get_language_for_name(poedit_language)
         except LookupError:
             self.tag('unknown-poedit-language', poedit_language)
         else:
             if language is None:
                 language = poedit_language
                 language_source = 'X-Poedit-Language header field'
             elif language.language_code != poedit_language.language_code:
                 self.tag('language-disparity', language,
                          tags.safestr('({})'.format(language_source)),
                          '!=', poedit_language,
                          tags.safestr('(X-Poedit-Language header field)'))
     if language is None:
         if not orig_meta_language and not duplicate_meta_language:
             self.tag('no-language-header-field')
         self.tag('unable-to-determine-language')
         return
     if not orig_meta_language and not duplicate_meta_language:
         self.tag('no-language-header-field', tags.safestr('Language:'),
                  language)
     ctx.language = language
예제 #11
0
 def check(self):
     # If a file passed to polib doesn't exist, it will “helpfully” treat it
     # as PO/MO file _contents_. This is definitely not what we want. To
     # prevent such disaster, fail early if the file doesn't exit.
     try:
         os.stat(self.path)
     except EnvironmentError as exc:
         self.tag('os-error', tags.safestr(exc.strerror))
         return
     if self.options.file_type is None:
         extension = os.path.splitext(self.path)[-1]
     else:
         extension = '.' + self.options.file_type
     is_template = False
     if extension == '.po':
         constructor = polib.pofile
     elif extension == '.pot':
         constructor = polib.pofile
         is_template = True
     elif extension in ('.mo', '.gmo'):
         constructor = polib.mofile
     else:
         self.tag('unknown-file-type')
         return
     broken_encoding = False
     try:
         try:
             file = constructor(self.path)
         except UnicodeDecodeError as exc:
             broken_encoding = exc
             file = constructor(self.path, encoding='ISO-8859-1')
     except polib4us.moparser.SyntaxError as exc:
         self.tag('invalid-mo-file', tags.safestr(exc))
         return
     except IOError as exc:
         message = str(exc)
         if exc.errno is not None:
             self.tag('os-error', tags.safestr(exc.strerror))
             return
         elif message.startswith('Syntax error in po file '):
             message = message[24:]
             message_parts = []
             if message.startswith(self.path + ' '):
                 message = message[len(self.path) + 1:]
             match = re.match(r'^\(line ([0-9]+)\)(?:: (.+))?$', message)
             if match is not None:
                 lineno_part = 'line {}'.format(match.group(1))
                 message = match.group(2)
                 if message is not None:
                     lineno_part += ':'
                     if re.match(r'^[a-z]+( [a-z]+)*$', message):
                         message = tags.safestr(message)
                 message_parts += [tags.safestr(lineno_part)]
             if message is not None:
                 message_parts += [message]
             self.tag('syntax-error-in-po-file', *message_parts)
             return
         raise
     finally:
         if broken_encoding:
             # pylint: disable=no-member
             s = broken_encoding.object
             assert isinstance(s, bytes)
             begin = max(broken_encoding.start - 40, 0)
             end = broken_encoding.start + 40
             s = s[begin:end]
             self.tag(
                 'broken-encoding',
                 s,
                 tags.safestr('cannot be decoded as'),
                 broken_encoding.encoding.upper(),
             )
             # pylint: enable=no-member
             broken_encoding = True
     ctx = misc.Namespace()
     ctx.file = file
     ctx.is_template = is_template
     self.check_comments(ctx)
     self.check_headers(ctx)
     self.check_language(ctx)
     self.check_plurals(ctx)
     self.check_mime(ctx)
     if broken_encoding:
         ctx.encoding = None
     self.check_dates(ctx)
     self.check_project(ctx)
     self.check_translator(ctx)
     self.check_messages(ctx)
예제 #12
0
 def check_args(self,
                message,
                src_loc,
                src_fmt,
                dst_loc,
                dst_fmt,
                *,
                omitted_int_conv_ok=False):
     prefix = message_repr(message, template='{}:')
     # unnamed arguments:
     src_args = src_fmt.seq_arguments
     dst_args = dst_fmt.seq_arguments
     if len(dst_args) != len(src_args):
         self.tag(
             'python-format-string-argument-number-mismatch',
             prefix,
             len(dst_args),
             tags.safestr('({})'.format(dst_loc)),
             '!=',
             len(src_args),
             tags.safestr('({})'.format(src_loc)),
         )
     for src_arg, dst_arg in zip(src_args, dst_args):
         if src_arg.type != dst_arg.type:
             self.tag(
                 'python-format-string-argument-type-mismatch',
                 prefix,
                 tags.safestr(dst_arg.type),
                 tags.safestr('({})'.format(dst_loc)),
                 '!=',
                 tags.safestr(src_arg.type),
                 tags.safestr('({})'.format(src_loc)),
             )
     # named arguments:
     src_args = src_fmt.map_arguments
     dst_args = dst_fmt.map_arguments
     for key in sorted(dst_args.keys() & src_args.keys()):
         src_arg = src_args[key][0]
         dst_arg = dst_args[key][0]
         if src_arg.type != dst_arg.type:
             self.tag(
                 'python-format-string-argument-type-mismatch',
                 prefix,
                 tags.safestr(dst_arg.type),
                 tags.safestr('({})'.format(dst_loc)),
                 '!=',
                 tags.safestr(src_arg.type),
                 tags.safestr('({})'.format(src_loc)),
             )
     for key in sorted(dst_args.keys() - src_args.keys()):
         self.tag(
             'python-format-string-unknown-argument',
             prefix,
             key,
             tags.safestr('in'),
             tags.safestr(dst_loc),
             tags.safestr('but not in'),
             tags.safestr(src_loc),
         )
     missing_keys = src_args.keys() - dst_args.keys()
     if len(missing_keys) == 1 and omitted_int_conv_ok:
         [missing_key] = missing_keys
         if all(arg.type == 'int' for arg in src_args[missing_key]):
             missing_keys = set()
     for key in sorted(missing_keys):
         self.tag(
             'python-format-string-missing-argument',
             prefix,
             key,
             tags.safestr('not in'),
             tags.safestr(dst_loc),
             tags.safestr('while in'),
             tags.safestr(src_loc),
         )