Ejemplo n.º 1
0
 def check_summary(self, pkg, lang, ignored_words):
     summary = pkg.langtag(rpm.RPMTAG_SUMMARY, lang)
     if use_utf8:
         if not Pkg.is_utf8_bytestr(summary):
             printError(pkg, 'tag-not-utf8', 'Summary', lang)
         summary = Pkg.to_unicode(summary)
     else:
         summary = Pkg.b2s(summary)
     self._unexpanded_macros(pkg, 'Summary(%s)' % lang, summary)
     spell_check(pkg, summary, 'Summary(%s)', lang, ignored_words)
     if '\n' in summary:
         printError(pkg, 'summary-on-multiple-lines', lang)
     if summary[0] != summary[0].upper():
         printWarning(pkg, 'summary-not-capitalized', lang, summary)
     if summary[-1] == '.':
         printWarning(pkg, 'summary-ended-with-dot', lang, summary)
     if len(summary) > max_line_len:
         printError(pkg, 'summary-too-long', lang, summary)
     if leading_space_regex.search(summary):
         printError(pkg, 'summary-has-leading-spaces', lang, summary)
     res = forbidden_words_regex.search(summary)
     if res and Config.getOption('ForbiddenWords'):
         printWarning(pkg, 'summary-use-invalid-word', lang, res.group(1))
     if pkg.name:
         sepchars = r'[\s%s]' % punct
         res = re.search(r'(?:^|\s)(%s)(?:%s|$)' %
                         (re.escape(pkg.name), sepchars),
                         summary, re.IGNORECASE | re.UNICODE)
         if res:
             printWarning(pkg, 'name-repeated-in-summary', lang,
                          res.group(1))
Ejemplo n.º 2
0
 def check_summary(self, pkg, lang, ignored_words):
     summary = pkg.langtag(rpm.RPMTAG_SUMMARY, lang)
     if use_utf8:
         if not Pkg.is_utf8_bytestr(summary):
             printError(pkg, 'tag-not-utf8', 'Summary', lang)
         summary = Pkg.to_unicode(summary)
     else:
         summary = Pkg.b2s(summary)
     self._unexpanded_macros(pkg, 'Summary(%s)' % lang, summary)
     spell_check(pkg, summary, 'Summary(%s)', lang, ignored_words)
     if '\n' in summary:
         printError(pkg, 'summary-on-multiple-lines', lang)
     if summary[0] != summary[0].upper():
         printWarning(pkg, 'summary-not-capitalized', lang, summary)
     if summary[-1] == '.':
         printWarning(pkg, 'summary-ended-with-dot', lang, summary)
     if len(summary) > max_line_len:
         printError(pkg, 'summary-too-long', lang, summary)
     if leading_space_regex.search(summary):
         printError(pkg, 'summary-has-leading-spaces', lang, summary)
     res = forbidden_words_regex.search(summary)
     if res and Config.getOption('ForbiddenWords'):
         printWarning(pkg, 'summary-use-invalid-word', lang, res.group(1))
     if pkg.name:
         sepchars = '[\s' + punct + ']'
         res = re.search('(?:^|\s)(%s)(?:%s|$)' %
                         (re.escape(pkg.name), sepchars),
                         summary, re.IGNORECASE | re.UNICODE)
         if res:
             printWarning(pkg, 'name-repeated-in-summary', lang,
                          res.group(1))
Ejemplo n.º 3
0
 def check_description(self, pkg, lang, ignored_words):
     description = pkg.langtag(rpm.RPMTAG_DESCRIPTION, lang)
     if use_utf8:
         if not Pkg.is_utf8_bytestr(description):
             printError(pkg, 'tag-not-utf8', '%description', lang)
         description = Pkg.to_unicode(description)
     else:
         description = Pkg.b2s(description)
     self._unexpanded_macros(pkg, '%%description -l %s' % lang, description)
     spell_check(pkg, description, '%%description -l %s', lang,
                 ignored_words)
     for l in description.splitlines():
         if len(l) > max_line_len:
             printError(pkg, 'description-line-too-long', lang, l)
         res = forbidden_words_regex.search(l)
         if res and Config.getOption('ForbiddenWords'):
             printWarning(pkg, 'description-use-invalid-word', lang,
                          res.group(1))
         res = tag_regex.search(l)
         if res:
             printWarning(pkg, 'tag-in-description', lang, res.group(1))
Ejemplo n.º 4
0
 def check_description(self, pkg, lang, ignored_words):
     description = pkg.langtag(rpm.RPMTAG_DESCRIPTION, lang)
     if use_utf8:
         if not Pkg.is_utf8_bytestr(description):
             printError(pkg, 'tag-not-utf8', '%description', lang)
         description = Pkg.to_unicode(description)
     else:
         description = Pkg.b2s(description)
     self._unexpanded_macros(pkg, '%%description -l %s' % lang, description)
     spell_check(pkg, description, '%%description -l %s', lang,
                 ignored_words)
     for l in description.splitlines():
         if len(l) > max_line_len:
             printError(pkg, 'description-line-too-long', lang, l)
         res = forbidden_words_regex.search(l)
         if res and Config.getOption('ForbiddenWords'):
             printWarning(pkg, 'description-use-invalid-word', lang,
                          res.group(1))
         res = tag_regex.search(l)
         if res:
             printWarning(pkg, 'tag-in-description', lang, res.group(1))
Ejemplo n.º 5
0
def spell_check(pkg, str, fmt, lang, ignored):

    dict_found = True
    warned = set()
    if enchant:
        if lang == 'C':
            lang = 'en_US'

        checker = _enchant_checkers.get(lang)
        if not checker and lang not in _enchant_checkers:
            try:
                checker = enchant.checker.SpellChecker(
                    lang, filters=[enchant.tokenize.EmailFilter,
                                   enchant.tokenize.URLFilter,
                                   enchant.tokenize.WikiWordFilter])
            except enchant.DictNotFoundError:
                printInfo(pkg, 'enchant-dictionary-not-found', lang)
                pass
            _enchant_checkers[lang] = checker

        if checker:
            # squeeze whitespace to ease leading context check
            checker.set_text(re.sub(r'\s+', ' ', str))
            if use_utf8:
                uppername = Pkg.to_unicode(pkg.header[rpm.RPMTAG_NAME]).upper()
            else:
                uppername = pkg.name.upper()
            upperparts = uppername.split('-')
            if lang.startswith('en'):
                ups = [x + "'S" for x in upperparts]
                upperparts.extend(ups)
            for err in checker:

                # Skip already warned and ignored words
                if err.word in warned or err.word in ignored:
                    continue

                # Skip all capitalized words that do not start a sentence
                if err.word[0].isupper() and not \
                        sentence_break_regex.search(checker.leading_context(3)):
                    continue

                upperword = err.word.upper()

                # Skip all uppercase words
                if err.word == upperword:
                    continue

                # Skip errors containing package name or equal to a
                # "component" of it, case insensitively
                if uppername in upperword or upperword in upperparts:
                    continue

                # Work around enchant's digit tokenizing behavior:
                # http://github.com/rfk/pyenchant/issues/issue/3
                if checker.leading_context(1).isdigit() or \
                        checker.trailing_context(1).isdigit():
                    continue

                # Warn and suggest
                sug = ', '.join(checker.suggest()[:3])
                if sug:
                    sug = '-> %s' % sug
                printWarning(pkg, 'spelling-error', fmt % lang, err.word, sug)
                warned.add(err.word)

        else:
            dict_found = False

    if not enchant or not dict_found:
        for seq in str.split():
            for word in re.split(r'[^a-z]+', seq.lower()):
                if len(word) == 0:
                    continue
                correct = BAD_WORDS.get(word)
                if not correct:
                    continue
                if word[0] == '\'':
                    word = word[1:]
                if word[-1] == '\'':
                    word = word[:-1]
                if word in warned or word in ignored:
                    continue
                printWarning(pkg, 'spelling-error', fmt % lang, word, '->',
                             correct)
                warned.add(word)
Ejemplo n.º 6
0
def spell_check(pkg, str, fmt, lang, ignored):

    dict_found = True
    warned = set()
    if enchant:
        if lang == 'C':
            lang = 'en_US'

        checker = _enchant_checkers.get(lang)
        if not checker and lang not in _enchant_checkers:
            try:
                checker = enchant.checker.SpellChecker(
                    lang, filters=[enchant.tokenize.EmailFilter,
                                   enchant.tokenize.URLFilter,
                                   enchant.tokenize.WikiWordFilter])
            except enchant.DictNotFoundError:
                printInfo(pkg, 'enchant-dictionary-not-found', lang)
                pass
            _enchant_checkers[lang] = checker

        if checker:
            # squeeze whitespace to ease leading context check
            checker.set_text(re.sub(r'\s+', ' ', str))
            if use_utf8:
                uppername = Pkg.to_unicode(pkg.header[rpm.RPMTAG_NAME]).upper()
            else:
                uppername = pkg.name.upper()
            upperparts = uppername.split('-')
            if lang.startswith('en'):
                ups = [x + "'S" for x in upperparts]
                upperparts.extend(ups)
            for err in checker:

                # Skip already warned and ignored words
                if err.word in warned or err.word in ignored:
                    continue

                # Skip all capitalized words that do not start a sentence
                if err.word[0].isupper() and not \
                        sentence_break_regex.search(checker.leading_context(3)):
                    continue

                upperword = err.word.upper()

                # Skip all uppercase words
                if err.word == upperword:
                    continue

                # Skip errors containing package name or equal to a
                # "component" of it, case insensitively
                if uppername in upperword or upperword in upperparts:
                    continue

                # Work around enchant's digit tokenizing behavior:
                # http://github.com/rfk/pyenchant/issues/issue/3
                if checker.leading_context(1).isdigit() or \
                        checker.trailing_context(1).isdigit():
                    continue

                # Warn and suggest
                sug = ', '.join(checker.suggest()[:3])
                if sug:
                    sug = '-> %s' % sug
                printWarning(pkg, 'spelling-error', fmt % lang, err.word, sug)
                warned.add(err.word)

        else:
            dict_found = False

    if not enchant or not dict_found:
        for seq in str.split():
            for word in re.split('[^a-z]+', seq.lower()):
                if len(word) == 0:
                    continue
                correct = BAD_WORDS.get(word)
                if not correct:
                    continue
                if word[0] == '\'':
                    word = word[1:]
                if word[-1] == '\'':
                    word = word[:-1]
                if word in warned or word in ignored:
                    continue
                printWarning(pkg, 'spelling-error', fmt % lang, word, '->',
                             correct)
                warned.add(word)