Python ispunct Examples

Programming Language: Python

Namespace/Package Name: hunmisc.xstring.xstring

Method/Function: ispunct

Examples at hotexamples.com: 4

Python ispunct - 4 examples found. These are the top rated real world Python examples of hunmisc.xstring.xstring.ispunct extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: huntool_wrapper.py Project: dominik-cliqz/hunmisc

 def replace_punct(self, token):
     """Replaces unicode punctuation marks with ones understood by
     ocamorph."""
     try:
         if ispunct(token):
             token.encode(self._encoding)
         return token
     except UnicodeError:
         if isquot(token):
             return '"'
         else:
             return ','

Example #2

Show file

File: huntool_wrapper.py Project: kbakulin/hunmisc

 def replace_punct(self, token):
     """Replaces unicode punctuation marks with ones understood by
     ocamorph."""
     try:
         if ispunct(token):
             token.encode(self._encoding)
         return token
     except UnicodeError:
         if isquot(token):
             return '"'
         else:
             return ','

Example #3

Show file

File: huntool_wrapper.py Project: dominik-cliqz/hunmisc

    def correct(self, analysis, original):
        """Inverts the xmlcharreplacements in the lemma, as well as
        replace_punct for unicode punctuation marks."""
        word, crap = analysis
        #        print "AAA", original.encode('utf-8'), word.encode('utf-8'), crap.encode('utf-8')
        if original == u'|':
            return (word, original + u'||PUNCT')
        try:
            lemma, stuff, derivation = crap.split('|')

            # If the original is a punctuation mark, tag it as such to avoid
            # problems with |, etc. Also, we include the original character,
            # not the one possibly replaced by replace_punct.
            if ispunct(original):
                return (word, original + u'||PUNCT')

            # Word not in the morphtable, or POS tag could not be determined.
            if crap == u'unknown||':
                lemma = word
                derivation = u'UNKNOWN'

            pieces = MorphAnalyzer.UNICODE_PATTERN.split(lemma)
            if len(pieces) > 1:
                for i in xrange(1, len(pieces), 2):
                    pieces[i] = unichr(int(pieces[i]))
                lemma = u''.join(pieces)

            if len(derivation) == 0:
                parts = lemma.rsplit('?', 1)
                if len(parts) >= 2:
                    lemma = parts[0]
                    derivation = parts[1].rsplit('/', 1)[-1].upper()
            return (word, lemma + u'|' + stuff + u'|' + derivation)
        except ValueError, ve:
            logging.debug(ve)
            logging.debug(word + u" // " + crap)
            raise ve

Example #4

Show file

File: huntool_wrapper.py Project: kbakulin/hunmisc

    def correct(self, analysis, original):
        """Inverts the xmlcharreplacements in the lemma, as well as
        replace_punct for unicode punctuation marks."""
        word, crap = analysis
#        print "AAA", original.encode('utf-8'), word.encode('utf-8'), crap.encode('utf-8')
        if original == u'|':
            return (word, original + u'||PUNCT')
        try:
            lemma, stuff, derivation = crap.split('|')

            # If the original is a punctuation mark, tag it as such to avoid
            # problems with |, etc. Also, we include the original character,
            # not the one possibly replaced by replace_punct.
            if ispunct(original):
                return (word, original + u'||PUNCT')

            # Word not in the morphtable, or POS tag could not be determined.
            if crap == u'unknown||':
                lemma = word
                derivation = u'UNKNOWN'

            pieces = MorphAnalyzer.UNICODE_PATTERN.split(lemma)
            if len(pieces) > 1:
                for i in xrange(1, len(pieces), 2):
                    pieces[i] = unichr(int(pieces[i]))
                lemma = u''.join(pieces)

            if len(derivation) == 0:
                parts = lemma.rsplit('?', 1)
                if len(parts) >= 2:
                    lemma = parts[0]
                    derivation = parts[1].rsplit('/', 1)[-1].upper()
            return (word, lemma + u'|' + stuff + u'|' + derivation)
        except ValueError, ve:
            logging.debug(ve)
            logging.debug(word + u" // " + crap)
            raise ve