Python PUNCTUATION_REGEX.match 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: textblob.utils

클래스/타입: PUNCTUATION_REGEX

메소드/함수: match

hotexamples.com에서의 예제들: 7

Python PUNCTUATION_REGEX.match - 7개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 textblob.utils.PUNCTUATION_REGEX.match에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

match(4)

예제 #1

파일 보기

파일: __main__.py 프로젝트: wjt/nanogenmo

def reassemble(sentences):
    at_start = True


    in_dquote = False
    buf = []

    for sentence in sentences:
        for fragment in sentence:
            if fragment == '"':
                if in_dquote:
                    in_dquote = False
                else:
                    if not at_start:
                        buf.append(' ')

                    at_start = True
                    in_dquote = True
            elif not PUNCTUATION_REGEX.match(unicode(fragment)) and not at_start:
                buf.append(' ')
            else:
                at_start = False

            if isinstance(fragment, Substitution):
                buf.append('<del>{}</del><ins>{}</ins>'.format(*fragment))
            else:
                buf.append(fragment)

    return ''.join(buf)

예제 #2

파일 보기

파일: taggers.py 프로젝트: DataReply/textblob-de

    def tag(self, sentence, tokenize=True):
        """Tag a string `sentence`.

        :param str or list sentence: A string or a list of sentence strings.
        :param tokenize: (optional) If ``False`` string has to be tokenized before
            (space separated string).

        """
        # : Do not process empty strings (Issue #3)
        if sentence.strip() == "":
            return []
        # : Do not process strings consisting of a single punctuation mark (Issue #4)
        elif sentence.strip() in PUNCTUATION:
            if self.include_punc:
                _sym = sentence.strip()
                if _sym in tuple('.?!'):
                    _tag = "."
                else:
                    _tag = _sym
                return [(_sym, _tag)]
            else:
                return []
        if tokenize:
            _tokenized = " ".join(self.tokenizer.tokenize(sentence))
            sentence = _tokenized
        # Sentence is tokenized before it is passed on to pattern.de.tag
        # (i.e. it is either submitted tokenized or if )
        _tagged = pattern_tag(sentence, tokenize=False)
        if self.include_punc:
            return _tagged
        else:
            _tagged = [(word, t) for word, t in _tagged
                       if not PUNCTUATION_REGEX.match(unicode(t))]
            return _tagged

예제 #3

파일 보기

파일: blob.py 프로젝트: Arttii/TextBlob

    def pos_tags(self):
        '''Returns an list of tuples of the form (word, POS tag).

        Example:
        ::

            [('At', 'IN'), ('eight', 'CD'), ("o'clock", 'JJ'), ('on', 'IN'),
                    ('Thursday', 'NNP'), ('morning', 'NN')]

        :rtype: list of tuples
        '''
        return [(Word(word, pos_tag=t), unicode(t))
                for word, t in self.pos_tagger.tag(self.raw)
                if not PUNCTUATION_REGEX.match(unicode(t))]

예제 #4

파일 보기

    def pos_tags(self):
        """Returns an list of tuples of the form (word, POS tag).

        Example:
        ::

            [('At', 'IN'), ('eight', 'CD'), ("o'clock", 'JJ'), ('on', 'IN'),
                    ('Thursday', 'NNP'), ('morning', 'NN')]

        :rtype: list of tuples
        """
        return [(Word(word, pos_tag=t), unicode(t))
                for word, t in self.pos_tagger.tag(self.raw)
                if not PUNCTUATION_REGEX.match(unicode(t))]

예제 #5

파일 보기

파일: blob.py 프로젝트: sloria/TextBlob

    def pos_tags(self):
        """Returns an list of tuples of the form (word, POS tag).

        Example:
        ::

            [('At', 'IN'), ('eight', 'CD'), ("o'clock", 'JJ'), ('on', 'IN'),
                    ('Thursday', 'NNP'), ('morning', 'NN')]

        :rtype: list of tuples
        """
        if isinstance(self, TextBlob):
            return [val for sublist in [s.pos_tags for s in self.sentences] for val in sublist]
        else:
            return [(Word(unicode(word), pos_tag=t), unicode(t))
                    for word, t in self.pos_tagger.tag(self)
                    if not PUNCTUATION_REGEX.match(unicode(t))]

예제 #6

파일 보기

파일: blob.py 프로젝트: valadis6760/ai-app-chatbot

    def pos_tags(self):
        """Returns an list of tuples of the form (word, POS tag).

        Example:
        ::

            [('At', 'IN'), ('eight', 'CD'), ("o'clock", 'JJ'), ('on', 'IN'),
                    ('Thursday', 'NNP'), ('morning', 'NN')]

        :rtype: list of tuples
        """
        if isinstance(self, TextBlob):
            return [
                val for sublist in [s.pos_tags for s in self.sentences]
                for val in sublist
            ]
        else:
            return [(Word(word, pos_tag=t), unicode(t))
                    for word, t in self.pos_tagger.tag(self)
                    if not PUNCTUATION_REGEX.match(unicode(t))]

예제 #7

파일 보기

파일: taggers.py 프로젝트: sayapapaya/German-Project-Backend

    def tag(self, sentence, tokenize=True):
        """Tag a string `sentence`.

        :param str or list sentence: A string or a list of sentence strings.
        :param tokenize: (optional) If ``False`` string has to be tokenized before
            (space separated string).

        """
        #: Do not process empty strings (Issue #3)
        if sentence.strip() == "":
            return []
        #: Do not process strings consisting of a single punctuation mark (Issue #4)
        elif sentence.strip() in PUNCTUATION:
            if self.include_punc:
                _sym = sentence.strip()
                if _sym in tuple('.?!'):
                    _tag = "."
                else:
                    _tag = _sym
                return [(_sym, _tag)]
            else:
                return []
        if tokenize:
            _tokenized = " ".join(self.tokenizer.tokenize(sentence))
            sentence = _tokenized
        # Sentence is tokenized before it is passed on to pattern.de.tag
        # (i.e. it is either submitted tokenized or if )
        _tagged = pattern_tag(sentence, tokenize=False,
                              encoding=self.encoding,
                              tagset=self.tagset)
        if self.include_punc:
            return _tagged
        else:
            _tagged = [
                (word, t) for word, t in _tagged if not PUNCTUATION_REGEX.match(
                    unicode(t))]
            return _tagged