Python CoreNLPClient.ensure_alive 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: stanfordnlp.server

클래스/타입: CoreNLPClient

메소드/함수: ensure_alive

hotexamples.com에서의 예제들: 2

Python CoreNLPClient.ensure_alive - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 stanfordnlp.server.CoreNLPClient.ensure_alive에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

CoreNLPClient(30)

annotate(16)

stop(8)

start(3)

ensure_alive(2)

TIMEOUT(1)

tag(1)

예제 #1

파일 보기

파일: parse_ace2004.py 프로젝트: awesome-archive/nested-ner-2019-bert

class Tokenizer:
    def __init__(self) -> None:
        os.environ[
            'CORENLP_HOME'] = '{}/stanford-corenlp-full-2018-10-05'.format(
                os.environ['HOME'])
        self.client = CoreNLPClient(annotators=['ssplit'])
        self.client.ensure_alive()
        self.do_lower_case = '-cased' not in config.bert_model
        self.basic_tokenizer: BasicTokenizer \
            = BertTokenizer.from_pretrained(config.bert_model, do_lower_case=self.do_lower_case).basic_tokenizer

    def tokenize(self, doc: str) -> List[List[Token]]:
        corenlp_annotation = self.client.annotate(doc)
        sentences = []
        for sentence in corenlp_annotation.sentence:
            text = doc[sentence.characterOffsetBegin:sentence.
                       characterOffsetEnd]
            if self.do_lower_case:
                text = text.lower()
            offset = sentence.characterOffsetBegin
            bert_tokens = self.basic_tokenizer.tokenize(text)
            begin = 0
            tokens = []
            for bert_token in bert_tokens:
                word = bert_token
                begin = text.index(word, begin)
                end = begin + len(word)
                tokens.append(Token(word, begin + offset, end + offset))
                begin = end
            if len(tokens) > 0:
                sentences.append(tokens)
        return sentences

예제 #2

파일 보기

파일: parse_ace2005.py 프로젝트: mylv1222/nested-ner-2019-bert

class Tokenizer:
    def __init__(self) -> None:
        os.environ[
            'CORENLP_HOME'] = '{}/stanford-corenlp-full-2018-10-05'.format(
                os.environ['HOME'])
        self.client = CoreNLPClient()
        self.client.ensure_alive()
        self.do_lower_case = '-cased' not in config.bert_model
        self.basic_tokenizer: BasicTokenizer \
            = BertTokenizer.from_pretrained(config.bert_model, do_lower_case=self.do_lower_case).basic_tokenizer

    def __del__(self) -> None:
        for p in glob.glob('corenlp_server-*.props'):
            if os.path.isfile(p):
                os.remove(p)

    def tokenize(self, doc: str) -> List[Sentence]:
        splitter_annotation \
            = self.client.annotate(doc, annotators=['ssplit'],
                                   properties={'tokenize.options': 'ptb3Escaping=false,invertible=true'})
        end = 0
        sentences = []
        for sentence in splitter_annotation.sentence:
            begin = doc.index(sentence.token[0].originalText, end)
            for token in sentence.token:
                end = doc.index(token.originalText, end) + len(
                    token.originalText)
            text = doc[begin:end]
            sentences.append(Sentence(text, begin, end))
        sentences = self.fix_split(sentences)
        for sentence in sentences:
            text = sentence.text
            if self.do_lower_case:
                text = text.lower()
            bert_tokens = self.basic_tokenizer.tokenize(text)
            end = 0
            tokens = []
            for bert_token in bert_tokens:
                word = bert_token
                begin = text.index(word, end)
                end = begin + len(word)
                tokens.append(
                    Token(word, sentence.begin + begin, sentence.begin + end))
            assert len(tokens) > 0
            sentence.tokens = tokens
        return sentences

    @staticmethod
    def fix_split(sentences: List[Sentence]) -> List[Sentence]:
        result = []
        i = 0
        while i < len(sentences):
            sentence = sentences[i]
            while True:
                next_sentence = sentences[
                    i + 1] if i < len(sentences) - 1 else None
                if '\n\n' in sentence.text:
                    index = sentence.text.index('\n\n')
                    new_sentence = Sentence(sentence.text[:index],
                                            sentence.begin,
                                            sentence.begin + index)
                    result.append(new_sentence)
                    index += re.search(r'[\n\t ]+',
                                       sentence.text[index:]).end()
                    sentence.text = sentence.text[index:]
                    sentence.begin += index
                elif next_sentence is not None and next_sentence.begin == sentence.end:
                    sentence.text += next_sentence.text
                    sentence.end = next_sentence.end
                    i += 1
                else:
                    result.append(sentence)
                    break
            i += 1
        return result