Python normalize Examples, translate.lang.data.normalize Python Examples

Example #1

0

Show file

File: decoration.py Project: AndryulE/kitsune

def isvalidaccelerator(accelerator, acceptlist=None):
    """returns whether the given accelerator character is valid

    @type accelerator: character
    @param accelerator: A character to be checked for accelerator validity
    @type acceptlist: String
    @param acceptlist: A list of characters that are permissible as accelerators
    @rtype: Boolean
    @return: True if the supplied character is an acceptable accelerator
    """
    assert isinstance(accelerator, unicode)
    assert isinstance(acceptlist, unicode) or acceptlist is None
    if len(accelerator) == 0:
        return False
    if acceptlist is not None:
        acceptlist = data.normalize(acceptlist)
        if accelerator in acceptlist:
            return True
        return False
    else:
        # Old code path - ensures that we don't get a large number of regressions
        accelerator = accelerator.replace("_","")
        if accelerator in u"-?":
            return True
        if not accelerator.isalnum():
            return False

        # We don't want to have accelerators on characters with diacritics, so let's 
        # see if the character can decompose.
        decomposition = unicodedata.decomposition(accelerator)
        # Next we strip out any extra information like <this>
        decomposition = re.sub("<[^>]+>", "", decomposition).strip()
        return decomposition.count(" ") == 0

Example #2

0

Show file

File: pogrep.py Project: AshishNamdev/verbatim

 def __init__(self, searchstring, searchparts, ignorecase=False, useregexp=False,
         invertmatch=False, keeptranslations=False, accelchar=None, encoding='utf-8',
         max_matches=0):
     """builds a checkfilter using the given checker"""
     if isinstance(searchstring, unicode):
         self.searchstring = searchstring
     else:
         self.searchstring = searchstring.decode(encoding)
     self.searchstring = data.normalize(self.searchstring)
     if searchparts:
         # For now we still support the old terminology, except for the old 'source'
         # which has a new meaning now.
         self.search_source = ('source' in searchparts) or ('msgid' in searchparts)
         self.search_target = ('target' in searchparts) or ('msgstr' in searchparts)
         self.search_notes = ('notes' in searchparts) or ('comment' in searchparts)
         self.search_locations = 'locations' in searchparts
     else:
         self.search_source = True
         self.search_target = True
         self.search_notes = False
         self.search_locations = False
     self.ignorecase = ignorecase
     if self.ignorecase:
         self.searchstring = self.searchstring.lower()
     self.useregexp = useregexp
     if self.useregexp:
         self.searchpattern = re.compile(self.searchstring)
     self.invertmatch = invertmatch
     self.keeptranslations = keeptranslations
     self.accelchar = accelchar
     self.max_matches = max_matches

Example #3

0

Show file

File: pogrep.py Project: AshishNamdev/verbatim

def real_index(string, nfc_index):
    """Calculate the real index in the unnormalized string that corresponds to
    the index nfc_index in the normalized string."""
    length = nfc_index
    max_length = len(string)
    while len(data.normalize(string[:length])) <= nfc_index:
        if length == max_length:
            return length
        length += 1
    return length - 1

Example #4

0

Show file

File: tmcontroller.py Project: adamchainz/virtaal

    def accept_response(self, tmmodel, query_str, matches):
        """Accept a query-response from the model.
            (This method is used as Model-Controller communications)"""
        if not self.storecursor:
            # File closed since the query was started
            return
        query_str = forceunicode(query_str)
        if query_str != self.current_query or not matches:
            return
        # Perform some sanity checks on matches first
        for match in matches:
            if not isinstance(match.get('quality', 0), int):
                match['quality'] = int(match['quality'] or 0)
            if 'tmsource' not in match or match['tmsource'] is None:
                match['tmsource'] = tmmodel.display_name
            match['query_str'] = query_str

        anything_new = False
        for match in matches:
            curr_targets = [normalize(m['target']) for m in self.matches]
            if normalize(match['target']) not in curr_targets:
                # Let's insert at the end to prioritise existing matches over
                # new ones. We rely on the guarantee of sort stability. This
                # way an existing 100% will be above a new 100%.
                self.matches.append(match)
                anything_new = True
            else:
                norm_match_target = normalize(match['target'])
                prevmatch = [m for m in self.matches if normalize(m['target']) == norm_match_target][0]
                if 'quality' not in prevmatch or not prevmatch['quality']:
                    # Matches without quality are assumed to be less appropriate
                    # (ie. MT matches) than matches with an associated quality.
                    self.matches.remove(prevmatch)
                    self.matches.append(match)
                    anything_new = True
        if not anything_new:
            return
        self.matches.sort(key=lambda x: 'quality' in x and x['quality'] or 0, reverse=True)
        self.matches = self.matches[:self.max_matches]

        # Only call display_matches if necessary:
        if self.matches:
            self.view.display_matches(self.matches)

Example #5

0

Show file

File: pogrep.py Project: AshishNamdev/verbatim

def find_matches(unit, part, strings, re_search):
    """Return the GrepFilter objects where re_search matches in strings."""
    matches = []
    for n, string in enumerate(strings):
        if not string:
            continue
        normalized = data.normalize(string)
        for matchobj in re_search.finditer(normalized):
            start = real_index(string, matchobj.start())
            end = real_index(string, matchobj.end())
            matches.append(GrepMatch(unit, part=part, part_n=n, start=start, end=end))
    return matches

Example #6

0

Show file

File: pogrep.py Project: AshishNamdev/verbatim

 def matches(self, teststr):
     if teststr is None:
         return False
     teststr = data.normalize(teststr)
     if self.ignorecase:
         teststr = teststr.lower()
     if self.accelchar:
         teststr = re.sub(self.accelchar + self.accelchar, "#", teststr)
         teststr = re.sub(self.accelchar, "", teststr)
     if self.useregexp:
         found = self.searchpattern.search(teststr)
     else:
         found = teststr.find(self.searchstring) != -1
     if self.invertmatch:
         found = not found
     return found

Example #7

0

Show file

File: pogrep.py Project: surli/translate

def find_matches(unit, part, strings, re_search):
    """Return the GrepFilter objects where re_search matches in strings."""
    matches = []
    for n, string in enumerate(strings):
        if not string:
            continue
        normalized = data.normalize(string)
        if normalized == string:
            index_func = lambda s, i: i
        else:
            index_func = real_index
        for matchobj in re_search.finditer(normalized):
            start = index_func(string, matchobj.start())
            end = index_func(string, matchobj.end())
            matches.append(GrepMatch(unit, part=part, part_n=n, start=start, end=end))
    return matches

Example #8

0

Show file

 def matches(self, teststr):
     if teststr is None:
         return False
     teststr = data.normalize(teststr)
     if self.ignorecase:
         teststr = teststr.lower()
     if self.accelchar:
         teststr = re.sub(self.accelchar + self.accelchar, "#", teststr)
         teststr = re.sub(self.accelchar, "", teststr)
     if self.useregexp:
         found = self.searchpattern.search(teststr)
     else:
         found = teststr.find(self.searchstring) != -1
     if self.invertmatch:
         found = not found
     return found

Example #9

0

Show file

File: pogrep.py Project: slide333333/translate

 def __init__(
     self,
     searchstring,
     searchparts,
     ignorecase=False,
     useregexp=False,
     invertmatch=False,
     keeptranslations=False,
     accelchar=None,
     encoding="utf-8",
     max_matches=0,
 ):
     """builds a checkfilter using the given checker"""
     if isinstance(searchstring, str):
         self.searchstring = searchstring
     else:
         self.searchstring = searchstring.decode(encoding)
     self.searchstring = data.normalize(self.searchstring)
     if searchparts:
         # For now we still support the old terminology, except for the old 'source'
         # which has a new meaning now.
         self.search_source = ("source" in searchparts) or ("msgid"
                                                            in searchparts)
         self.search_target = ("target" in searchparts) or ("msgstr"
                                                            in searchparts)
         self.search_notes = ("notes" in searchparts) or ("comment"
                                                          in searchparts)
         self.search_locations = "locations" in searchparts
     else:
         self.search_source = True
         self.search_target = True
         self.search_notes = False
         self.search_locations = False
     self.ignorecase = ignorecase
     if self.ignorecase:
         self.searchstring = self.searchstring.lower()
     self.useregexp = useregexp
     if self.useregexp:
         self.searchpattern = re.compile(self.searchstring)
     self.invertmatch = invertmatch
     self.keeptranslations = keeptranslations
     self.accelchar = accelchar
     self.max_matches = max_matches

Example #10

0

Show file

 def __init__(self,
              searchstring,
              searchparts,
              ignorecase=False,
              useregexp=False,
              invertmatch=False,
              accelchar=None,
              encoding='utf-8',
              max_matches=0):
     """builds a checkfilter using the given checker"""
     if isinstance(searchstring, unicode):
         self.searchstring = searchstring
     else:
         self.searchstring = searchstring.decode(encoding)
     self.searchstring = data.normalize(self.searchstring)
     if searchparts:
         # For now we still support the old terminology, except for the old 'source'
         # which has a new meaning now.
         self.search_source = ('source' in searchparts) or ('msgid'
                                                            in searchparts)
         self.search_target = ('target' in searchparts) or ('msgstr'
                                                            in searchparts)
         self.search_notes = ('notes' in searchparts) or ('comment'
                                                          in searchparts)
         self.search_locations = 'locations' in searchparts
     else:
         self.search_source = True
         self.search_target = True
         self.search_notes = False
         self.search_locations = False
     self.ignorecase = ignorecase
     if self.ignorecase:
         self.searchstring = self.searchstring.lower()
     self.useregexp = useregexp
     if self.useregexp:
         self.searchpattern = re.compile(self.searchstring)
     self.invertmatch = invertmatch
     self.accelchar = accelchar
     self.max_matches = max_matches

Example #11

0

Show file

File: translate_toolkit.py Project: vortexvt/pontoon

def run_checks(original, string, locale_code, disabled_checks=None):
    """Check for obvious errors like blanks and missing interpunction."""
    original = lang_data.normalize(original)
    string = lang_data.normalize(string)
    disabled_checks = disabled_checks or []

    unit = storage_base.TranslationUnit(original)
    unit.target = string
    checker = checks.StandardChecker(
        checkerconfig=checks.CheckerConfig(targetlanguage=locale_code),
        excludefilters=disabled_checks,
    )

    warnings = checker.run_filters(unit)

    if not warnings:
        return {}

    check_names = {
        "accelerators": "Accelerators",
        "blank": "Blank",
        "brackets": "Brackets",
        "compendiumconflicts": "Compendium conflict",
        "credits": "Translator credits",
        "doublequoting": "Double quotes",
        "doublespacing": "Double spaces",
        "doublewords": "Repeated word",
        "emails": "E-mail",
        "endpunc": "Ending punctuation",
        "endwhitespace": "Ending whitespace",
        "escapes": "Escapes",
        "filepaths": "File paths",
        "functions": "Functions",
        "long": "Long",
        "musttranslatewords": "Must translate words",
        "newlines": "Newlines",
        "nplurals": "Number of plurals",
        "notranslatewords": "Don't translate words",
        "numbers": "Numbers",
        "options": "Options",
        "printf": "Printf format string mismatch",
        "puncspacing": "Punctuation spacing",
        "purepunc": "Pure punctuation",
        "sentencecount": "Number of sentences",
        "short": "Short",
        "simplecaps": "Simple capitalization",
        "simpleplurals": "Simple plural(s)",
        "singlequoting": "Single quotes",
        "startcaps": "Starting capitalization",
        "startpunc": "Starting punctuation",
        "startwhitespace": "Starting whitespace",
        "tabs": "Tabs",
        "unchanged": "Unchanged",
        "urls": "URLs",
        "validchars": "Valid characters",
        "variables": "Placeholders",
        "xmltags": "XML tags",
    }

    warnings_array = []
    for key in warnings.keys():
        warning = check_names.get(key, key)
        warnings_array.append(warning)

    return {
        "ttWarnings": warnings_array,
    }