Python CoreNLPParser.parse_text 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: nltk.parse.corenlp

클래스/타입: CoreNLPParser

메소드/함수: parse_text

hotexamples.com에서의 예제들: 3

Python CoreNLPParser.parse_text - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 nltk.parse.corenlp.CoreNLPParser.parse_text에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

CoreNLPParser(30)

raw_parse(29)

tokenize(20)

tag(9)

api_call(5)

parse(4)

parse_text(3)

raw_parse_sents(3)

parse_one(1)

tokenize_sents(1)

예제 #1

파일 보기

파일: rpg.py 프로젝트: sawyerpollard/Reddit-Pair-Generator

class RPG:
    def __init__(self, storage_path, corenlp_url):
        self.storage_path = os.path.abspath(storage_path)
        self.parser = CoreNLPParser(url=corenlp_url)

    def create_question_json(self, filepath: str, max_question_word_count: int = 30, update: bool = False) -> str:
        newfilepath = os.path.join(
            self.storage_path, 'questions_' + os.path.basename(filepath))
        if not update and os.path.exists(newfilepath):
            return newfilepath
        wfile = open(newfilepath, 'w')

        filepath = os.path.abspath(filepath)
        rfile = open(filepath, 'r')

        question_symbols = {'SBARQ', 'SQ'}

        for line in rfile:
            answer = next(rfile)
            json_line = json.loads(line)
            property = str(json_line['body'])
            if len(property.split()) > max_question_word_count:
                continue
            try:
                parse_trees = self.parser.parse_text(property)
                for parse_tree in parse_trees:
                    for subtree in parse_tree.subtrees():
                        if subtree.label() in question_symbols:
                            wfile.write(line)
                            wfile.write(answer)
                            raise Exception('Prevent duplicate write')
            except Exception as e:
                pass
        return newfilepath

    def perform_ner(self, filepath: str, entity_type: str = 'number', update: bool = False) -> str:
        newfilepath = os.path.join(
            self.storage_path, entity_type + '_' + os.path.basename(filepath))
        if not update and os.path.exists(newfilepath):
            return newfilepath
        wfile = open(newfilepath, 'w')

        filepath = os.path.abspath(filepath)
        rfile = open(filepath, 'r')

        recognizer = NumberRecognizer(Culture.English)
        model = recognizer.get_number_model()
        for line in rfile:
            answer = next(rfile)

            answer_json = json.loads(answer)

            text = str(answer_json['body'])
            try:
                result = model.parse(text)
                if result:
                    for x in result:
                        if x.type_name == entity_type:
                            wfile.write(line)
                            wfile.write(answer)
                            break
            except Exception as e:
                print(e)
        return newfilepath

    def create_subreddit_json(self, filepath: str, subreddit: str, update: bool = False) -> str:
        subreddit = subreddit.lower()

        newfilepath = os.path.join(
            self.storage_path, subreddit + '_' + os.path.basename(filepath))
        if not update and os.path.exists(newfilepath):
            return newfilepath
        wfile = open(newfilepath, 'w')

        filepath = os.path.abspath(filepath)
        rfile = open(filepath, 'r')

        for line in rfile:
            try:
                json_line = json.loads(line)
                if json_line['subreddit'].lower() == subreddit:
                    wfile.write(line)
            except Exception as e:
                print(e)

        rfile.close()
        wfile.close()
        return newfilepath

    def find_comment_pairs(self, filepath: str, min_score: int = 0, update: bool = False) -> str:
        newfilepath = os.path.join(
            self.storage_path, 'pairs_' + os.path.basename(filepath))
        if not update and os.path.exists(newfilepath):
            return newfilepath
        wfile = open(newfilepath, 'w')

        filepath = os.path.abspath(filepath)
        rfile = open(filepath, 'r')

        for line in rfile:
            try:
                comment = json.loads(line)
            except Exception as e:
                print(e)
                continue
            if comment['score'] > min_score:
                rfile_comparison = open(filepath, 'r')
                highest_score_comparison = {'score': 0}
                for line_comparison in rfile_comparison:
                    try:
                        comment_comparison = json.loads(line_comparison)
                    except Exception as e:
                        print(e)
                        continue
                    if (comment['parent_id'][3:] == comment_comparison['id'] and
                            comment_comparison['score'] > highest_score_comparison['score']):
                        highest_score_comparison = comment_comparison
                if highest_score_comparison['score'] > 0:
                    wfile.write(json.dumps(highest_score_comparison) + '\n')
                    wfile.write(line)
        return newfilepath
    
    def perform_all(self, filepath: str, subreddits: list, update: bool):
        for subreddit in subreddits:
            sub_time = time.time()
            sub = self.create_subreddit_json(filepath, subreddit, update = update)
            print('Subreddit comments file created in {:.2f} seconds'.format(time.time() - sub_time))

            pairs_time = time.time()
            pairs = self.find_comment_pairs(sub, update = update)
            print('Comment pairs file created in {:.2f} seconds'.format(time.time() - pairs_time))

            questions_time = time.time()
            questions = self.create_question_json(pairs, update = update)
            print('Question-answer file created in {:.2f} seconds'.format(time.time() - questions_time))

            ner_time = time.time()
            ner = self.perform_ner(questions)
            print('Entity file created in {:.2f} seconds'.format(time.time() - ner_time))

예제 #2

파일 보기

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Oct 27 13:05:59 2018

@author: raja
"""

import pandas as pd
df = pd.read_csv('text_alone.csv', header=None,delimiter="\t")

from nltk.parse.corenlp import CoreNLPParser
parser = CoreNLPParser(url='http://localhost:9010')

i=len(df[0])
pp=[]
for one_t in df[0]:
    text=one_t    
    #text =  "The runner scored from second on a base hit"
    
    parse123 = next(parser.parse_text(text))
    #Flattenning the tree
    parse_string = ' '.join(str(parse123).split()) 
    pp.append(parse_string)
    i=i-1
    if i%10==0:
        print(i)
        
ppdata=pd.DataFrame(pp)
ppdata.to_csv('pos_tree1.csv',index=False,header=False)

예제 #3

파일 보기

import os
import nltk
from nltk.parse.corenlp import CoreNLPServer
from nltk.parse.corenlp import CoreNLPParser
from nltk.parse.corenlp import CoreNLPDependencyParser

STANFORD = "stanford-corenlp-full-2018-10-05"

jars = (
    os.path.join(STANFORD, "stanford-corenlp-3.9.2.jar"),
    os.path.join(STANFORD, "stanford-corenlp-3.9.2-models.jar"),
)

text = "turn right and go up the stairs and stand at the top."
#text = "Walk out of the closet and into the hallway. Walk through the hallway entrance on the left. Stop just inside the entryway."
#text = "Turn, putting the exit of the building on your left. Walk to the end of the entrance way and turn left. Travel across the kitchen area with the counter and chairs on your right. Continue straight until you reach the dining room. Enter the room and stop and wait one meter from the closest end of the long dining table."
print(text)
with CoreNLPServer(*jars):

    parser = CoreNLPParser()
    for i in parser.parse_text(text):
        print(i)

    parser = CoreNLPDependencyParser()
    for i in parser.raw_parse(text):
        print(i)