def init_server(self): STANDFORD = os.path.join("stanford-corenlp-full-2018-10-05") self.server = CoreNLPServer( os.path.join(STANDFORD, "stanford-corenlp-3.9.2.jar"), os.path.join(STANDFORD, "stanford-corenlp-3.9.2-models.jar")) self.server.start() self.parser = CoreNLPParser()
class CorenlpSubprocWordSplitter(CorenlpRemoteWordSplitter): """ A ``WordSplitter`` that uses CoreNLP's tokenizer. It starts ``corenlp-server`` as a sub-process, and call it's Web API. """ def __init__( self, path_to_jar: str = None, path_to_models_jar: str = None, verbose: str = False, java_options: str = None, corenlp_options: str = None, port: int = None, encoding: str = 'utf8', ): """ Parameters ---------- * For parameters from ``path_to_jar`` to ``port``, see https://www.nltk.org/api/nltk.parse.html#nltk.parse.corenlp. * For parameter ``encoding``, see https://www.nltk.org/api/nltk.parse.html#nltk.parse.corenlp.CoreNLPParser """ self._server = CoreNLPServer(path_to_jar, path_to_models_jar, verbose, java_options, corenlp_options, port) self._server.start() super().__init__(self._server.url, encoding) def __del__(self): self._server.stop()
def startServer(self): java_path = "C:\\Program Files\\Java\\jdk1.8.0_201\\bin\\java.exe" os.environ['JAVAHOME'] = java_path home = os.path.expanduser("~") download_path = os.path.join(home, "Downloads") print(download_path) # # The server needs to know the location of the following files: # # - stanford-corenlp-X.X.X.jar # # - stanford-corenlp-X.X.X-models.jar STANFORD = os.path.join(download_path, "stanford-corenlp-full-2018-10-05") # # Create the server server = CoreNLPServer( os.path.join(STANFORD, "stanford-corenlp-3.9.2-models.jar"), os.path.join(STANFORD, "stanford-corenlp-3.9.2.jar"), os.path.join(STANFORD, "stanford-english-corenlp-2018-10-05-models"), ) # # Start the server in the background server.start() print("Server Started") self.stanfordCoreNLP = StanfordCoreNLP('http://localhost:9000') return self.stanfordCoreNLP
def setup(manageServerInternally=False): global server config['isManagingServer'] = manageServerInternally if manageServerInternally: print("Starting CoreNLP server...") server = CoreNLPServer( os.path.join(STANFORD, "stanford-corenlp-3.9.2.jar"), os.path.join(STANFORD, "stanford-corenlp-3.9.2-models.jar"), ) server.start() else: try: print("Checking connection to CoreNLP server...") requests.get(f'{config["coreNLPServerURL"]}/live') except BaseException: print( "Error connecting to CoreNLP instance! Make sure the server is running in the background." ) print("The relevant command can be found in the README.") exit(1) setupQANet()
def start_core_nlp_server(self): home = os.path.expanduser("~") if os.name == 'nt': java_path = "C:\\Program Files\\Java\\jdk1.8.0_201\\bin\\java.exe" download_path = os.path.join(home, "Downloads") STANFORD_HOME = os.path.join(download_path, "stanford-corenlp-full-2018-10-05") else: #'posix java_path ="/usr/lib/jvm/java-8-oracle/" download_path = os.path.join(home, "ttp_sense_python") STANFORD_HOME = os.path.join(download_path, "lib") print('Stanford_Directory: ', STANFORD_HOME) os.environ['JAVAHOME'] = java_path # # The server needs to know the location of the following files: # # - stanford-corenlp-X.X.X.jar # # - stanford-corenlp-X.X.X-models.jar # # Create the server server = CoreNLPServer( os.path.join(STANFORD_HOME, "stanford-corenlp-3.9.2-models.jar"), os.path.join(STANFORD_HOME, "stanford-corenlp-3.9.2.jar"), os.path.join(STANFORD_HOME, "stanford-english-corenlp-2018-10-05-models.jar"), ) # # Start the server in the background server.start() print("Server Started")
class CoreNLP: def __init__(self, args): self.context = dict() self.server = None self.set_system_env(*args) def set_system_env(self, *args): idx = 1 while idx < len(args): if args[idx] == '--stanford': idx += 1 standford_path = args[idx] self.context['path_to_jar'] = os.path.join(standford_path, 'stanford-corenlp-3.9.2.jar') self.context['path_to_models_jar'] = os.path.join(standford_path, 'stanford-corenlp-3.9.2-models.jar') print('corenlp jar:', self.context['path_to_jar']) print('corenlp models jar:', self.context['path_to_models_jar']) elif args[idx] == '--java': idx += 1 java_path = args[idx] os.environ['JAVAHOME'] = java_path print('java path:', java_path) idx += 1 def start_server(self): self.server = CoreNLPServer(**self.context) self.server.start() def stop_server(self): self.server.stop() def parse_tree(self, s): parser = CoreNLPParser() parse = next(parser.raw_parse(s)) # parse.draw() return parse def dependency_parse_tree(self, s): parser = CoreNLPDependencyParser() parse = next(parser.raw_parse(s)) return parse
def __init__(self, jar_path, models_jar_path): """ The constructor for Summarizer class. Parameters: jar_path (str): Filepath to Stanford CoreNLP .jar file. models_jar_path (str): Filepath to Stanford CoreNLP models .jar file. """ logging.info('Starting CoreNLP server...') self.server = CoreNLPServer(path_to_jar=jar_path, path_to_models_jar=models_jar_path) try: self.server.start() logging.info('CoreNLP server started.') # CoreNLPServerError is thrown when a server is already running except CoreNLPServerError: logging.warning('CoreNLP server is already running.') self.parser = CoreNLPDependencyParser()
def __init__( self, path_to_jar: str = None, path_to_models_jar: str = None, verbose: str = False, java_options: str = None, corenlp_options: str = None, port: int = None, encoding: str = 'utf8', ): """ Parameters ---------- * For parameters from ``path_to_jar`` to ``port``, see https://www.nltk.org/api/nltk.parse.html#nltk.parse.corenlp. * For parameter ``encoding``, see https://www.nltk.org/api/nltk.parse.html#nltk.parse.corenlp.CoreNLPParser """ self._server = CoreNLPServer(path_to_jar, path_to_models_jar, verbose, java_options, corenlp_options, port) self._server.start() super().__init__(self._server.url, encoding)
def start_core_nlp_server(self): os.environ['JAVAHOME'] = self.JAVA_HOME HOMEDIR = os.path.expanduser("~") DOWNLOAD_HOME = os.path.join(HOMEDIR, self.DOWNLOAD_HOME) STANFORD_HOME = os.path.join(DOWNLOAD_HOME, self.STANFORD_HOME) print('Stanford_Directory: ', STANFORD_HOME) # # The server needs to know the location of the following files: # # - stanford-corenlp-X.X.X.jar # # - stanford-corenlp-X.X.X-models.jar # # Create the server server = CoreNLPServer( os.path.join(STANFORD_HOME, "stanford-corenlp-3.9.2-models.jar"), os.path.join(STANFORD_HOME, "stanford-corenlp-3.9.2.jar"), os.path.join(STANFORD_HOME, "stanford-english-corenlp-2018-10-05-models.jar"), ) # # Start the server in the background server.start() print("Server Started")
def start_CoreNLPServer(self): url = 'http://localhost:9000' status_code = 0 try: status_code = urllib.request.urlopen(url).getcode() except: pass if status_code != 200: print('CoreNLPServer is starting {}'.format(url)) try: os.environ['CLASSPATH'] = self.model_path server = CoreNLPServer(port=9000) server.start() status_code = urllib.request.urlopen(url).getcode() print('server started {}'.format(status_code)) except Exception as e: print(url, e) raise Exception(e)
def dependency_parse(raw_data): from nltk.parse.corenlp import CoreNLPServer # The server needs to know the location of the following files: # - stanford-corenlp-X.X.X.jar # - stanford-corenlp-X.X.X-models.jar STANFORD = os.path.join("..", "stanford-corenlp-full-2020-04-20") # Create the server server = CoreNLPServer( os.path.join(STANFORD, "stanford-corenlp-4.0.0.jar"), os.path.join(STANFORD, "stanford-corenlp-4.0.0-models.jar"), ) # Start the server in the background server.start() from nltk.parse import CoreNLPParser parser = CoreNLPParser() new_data = [] for example in raw_data: sentence, features_seq = example[0], example[-1] parse = next(parser.raw_parse(sentence)) # get a few "important" neighboring words server.stop()
def setup(self): url = settings.CORENLP_URL if url is None: server = CoreNLPServer( settings.CORENLP_PATH, settings.CORENLP_MODEL_PATH, ) server.start() self.server = server url = server.url else: print("[TreeParser] Using existing CoreNLP Server...") self.parser = CoreNLPParser(url=url) # maybe separated with another class... self.dependency_parser = CoreNLPDependencyParser(url=url) return self.parser
def get_standford(corenlp_mode=CORENLP_MODE): # load the parser if not STANDFORD: if JAVA_PATH: os.environ['JAVAHOME'] = JAVA_PATH if corenlp_mode: STANDFORD["server"] = CoreNLPServer( path_to_jar=PATH_TO_JAR, path_to_models_jar=PATH_TO_MODELS_JAR, java_options=JAVA_OPTIONS, verbose=True) print("starting server") STANDFORD["server"].start() print("server on") STANDFORD["parser"] = CoreNLPParser(url=STANDFORD["server"].url) else: STANDFORD["parser"] = StanfordParser( path_to_jar=PATH_TO_JAR, path_to_models_jar=PATH_TO_MODELS_JAR, java_options=JAVA_OPTIONS) return STANDFORD["parser"]
def server(): print('Starting CoreNLP server...') serv = CoreNLPServer(path_to_jar=config.CORENLP_JAR, path_to_models_jar=config.CORENLP_MODELS_JAR) try: serv.start() print('Server started.') while True: pass except KeyboardInterrupt: pass except Exception as e: print(e) finally: print('Stopping server...') serv.stop()
def __init__(self, sentence): with CoreNLPServer(port=9000) as server: en_parser = CoreNLPParser() # sg = StanfordTokenizer(path_to_jar='../stanford-parser-full-2018-02-27/stanford-parser.jar') self.trans = googletrans.Translator() self.sentence = sentence result1 = self.trans.translate(sentence).text print(result1) # en_sencence = result1.split(".") # print(en_sencence) # tree = list(en_parser.raw_parse(result1)) iter = en_parser.raw_parse_sents([result1]) tree = [] while True: try: sub_tree = list(next(iter)) tree.append(sub_tree) except StopIteration: break print(len(tree)) self.tree = tree[0][0] self.rel = []
#import re from nltk.corpus import stopwords from clean_data import process_sentence ##2017 12 3 using a different parser to parse sentence ''' from nltk.parse.stanford import StanfordDependencyParser path_to_jar = '/Users/collin/stanford/stanford-parser-full-2017-06-09/stanford-parser.jar' path_to_models_jar = '/Users/collin/stanford/stanford-parser-full-2017-06-09/stanford-parser-3.8.0-models.jar' dependency_parser = StanfordDependencyParser(path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar) ''' from nltk.parse.corenlp import CoreNLPServer, CoreNLPDependencyParser path_to_jar = '/Users/collin/stanford/stanford-corenlp-full-2017-06-09/stanford-corenlp-3.8.0.jar' path_to_models_jar = '/Users/collin/stanford/stanford-corenlp-full-2017-06-09/stanford-corenlp-3.8.0-models.jar' server = CoreNLPServer(path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar) server.start() dependency_parser = CoreNLPDependencyParser() stemmer = SnowballStemmer('english') def stem(w): return stemmer.stem(w) DR_one = ['nsubj', 'dobj', 'xsubj', 'csubj', 'nmod', 'iobj', 'xcomp'] DR_two = ['amod'] #DR_two = ['nsubj','dobj','xsubj','csubj','nsubjpass','nmod','iobj'] DR_three = ['conj'] DR = DR_one + DR_three
return total_words / num_sentences def getPerplexity(sentences): raise NotImplementedError if __name__ == '__main__': working_directory = os.getcwd() core_nlp_directory = os.path.join(working_directory, 'stanford-corenlp-4.2.0') server = CoreNLPServer(os.path.join(core_nlp_directory, "stanford-corenlp-4.2.0.jar"), os.path.join(core_nlp_directory, "stanford-corenlp-4.2.0-models.jar"), verbose=True) input_sents = TOUCHDOWN.load_TOUCHDOWN() print('Vocab Size:') print(getVocabSize(input_sents)) print('Mean Frazier Score:') print(getMeanFrazier(server, input_sents)) print('Mean Yngve Score:') print(getMeanYngve(server, input_sents)) print('PoS Distribution:') print(getPoSDistribution(input_sents)) print('Average Sent Len:') print(getAvgSentLen(input_sents))
from nltk.parse.corenlp import CoreNLPServer import os # The server needs to know the location of the following files: # # - stanford-corenlp-X.X.X.jar # # - stanford-corenlp-X.X.X-models.jar main_dir = os.path.dirname(os.path.realpath(__file__)) STANFORD = os.path.join(main_dir, "models", "stanford-corenlp-full-2018-10-05") # Create the server server = CoreNLPServer( os.path.join(STANFORD, "stanford-corenlp-3.9.2.jar"), os.path.join(STANFORD, "stanford-corenlp-3.9.2-models.jar") ) # Start the server in the background server.start()
import os from nltk.parse.corenlp import CoreNLPServer # The server needs to know the location of the following files: # - stanford-corenlp-X.X.X.jar # - stanford-corenlp-X.X.X-models.jar from cpath import data_path # Create the server server = CoreNLPServer( os.path.join(data_path, "stanford-corenlp-4.0.0.jar"), os.path.join(data_path, "stanford-corenlp-4.0.0-models.jar"), ) # Start the server in the background server.start()
return float(x) / float(y) class BulletPointLangVars(PunktLanguageVars): sent_end_chars = ('.', '?', '!', '•', '...') BROWN_BIGRAMS = FreqDist(bigrams(brown.words(categories=['reviews']))) TOKENIZER = RegexpTokenizer(r'\w+') SENT_TOKENIZER = PunktSentenceTokenizer(lang_vars=BulletPointLangVars()) TREGEX = "../../tregex" TEMP = "./" STANFORD = "../../stanford-corenlp-4.0.0" SERVER = CoreNLPServer(os.path.join(STANFORD, "stanford-corenlp-4.0.0.jar"), os.path.join(STANFORD, "stanford-corenlp-4.0.0-models.jar"), port=9000, java_options='-Xmx4g -Xms1g') PARSER = CoreNLPParser() class SingleTextProcessor(object): """Class that stores and processes a single text""" def __init__(self, text_string, toeic_score, text_id, mode): self.raw_text = text_string.replace('\n', ' ') self.toeic_score = toeic_score self.text_id = text_id self.mode = mode self.sentences = sent_tokenize(self.raw_text)
import os from nltk.parse.corenlp import CoreNLPServer stanford = os.path.join("stanford-corenlp") server = CoreNLPServer( os.path.join(stanford, "/home/mek/stanford-corenlp/stanford-corenlp-3.9.2.jar"), os.path.join( stanford, "/home/mek/stanford-corenlp/stanford-corenlp-3.9.2-models.jar")) server.start()
from extraction.ProcessElementsBuilder import ProcessElementsBuilder app = Flask(__name__) ALLOWED_EXTENSIONS = {'txt'} CORENLP_PATH = path.join(path.dirname(path.dirname(path.abspath(__file__))), "resources/corenlp") # Starting the CoreNLP Server try: server = CoreNLPServer(corenlp_options=[ "-preload", "tokenize,ssplit,pos,parse,depparse", "-timeout", "60000", "-serverProperties", path.join(CORENLP_PATH, "StanfordCoreNLP-serverProps.properties") ], path_to_jar=path.join(CORENLP_PATH, "stanford-corenlp-3.9.2.jar"), path_to_models_jar=path.join( CORENLP_PATH, "stanford-corenlp-3.9.2-models.jar"), verbose=True, java_options="-Xmx4g", port=9000) server._classpath = path.join(CORENLP_PATH, "*") server.start() atexit.register(server.stop) except error: print("Something is already running on port 9000.") def allowed_file(filename): return '.' in filename and \
from nltk.parse.corenlp import CoreNLPServer server = CoreNLPServer( '../pre_trained_models/stanford-corenlp-4.0.0.jar', '../pre_trained_models/stanford-corenlp-4.0.0-models.jar' ) server.start()
from urllib import request from nltk import FreqDist from nltk.stem import PorterStemmer from nltk.tokenize import word_tokenize from nltk.util import bigrams from nltk.parse.corenlp import CoreNLPServer if not "stanford-corenlp-4.0.0" in os.listdir(): urllib.request.urlretrieve( 'http://nlp.stanford.edu/software/stanford-corenlp-latest.zip', 'stanford-corenlp-latest.zip') zipfile.ZipFile('stanford-corenlp-latest.zip', 'r').extractall('./') STANFORD = "./stanford-corenlp-4.0.0" server = CoreNLPServer( "./stanford-corenlp-4.0.0/stanford-corenlp-4.0.0.jar", "./stanford-corenlp-4.0.0/stanford-corenlp-4.0.0-models.jar", ) initialized = True #if this is false, the initialize() function will be running up_to_date = True #check if this is up-to-date ps = PorterStemmer() #stemmer labels = [ "toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate" ] list_normal_features = [ "num_word", "num_unique_word", "rate_unique", "num_token_no_stop", "num_spelling_error", "num_all_cap", "rate_all_cap", "length_cmt", "num_cap_letter", "rate_cap_letter", "num_explan_mark", "rate_explan_mark", "num_quest_mark", "rate_quest_mark", "num_punc_mark", "num_mark_sym", "num_smile", "rate_space", "rate_lower", "bad_words_type_1",
from nltk.parse.corenlp import CoreNLPServer import os # The server needs to know the location of the following files: # - stanford-corenlp-X.X.X.jar # - stanford-corenlp-X.X.X-models.jar STANFORD = "../../stanford-corenlp-4.0.0" # Create the server server = CoreNLPServer(os.path.join(STANFORD, "stanford-corenlp-4.0.0.jar"), os.path.join(STANFORD, "stanford-corenlp-4.0.0-models.jar"), port=9000) # Start the server in the background print(server.url) server.start()
class Summarizer: """ Summarizer class implementing opinion-feature extraction. Uses Stanford CoreNLP dependency parser. Attributes: server (CoreNLPServer): CoreNLP server for accessing Stanford CoreNLP services. parser (CoreNLPDependencyParser): CoreNLP dependency parser. """ def __init__(self, jar_path, models_jar_path): """ The constructor for Summarizer class. Parameters: jar_path (str): Filepath to Stanford CoreNLP .jar file. models_jar_path (str): Filepath to Stanford CoreNLP models .jar file. """ logging.info('Starting CoreNLP server...') self.server = CoreNLPServer(path_to_jar=jar_path, path_to_models_jar=models_jar_path) try: self.server.start() logging.info('CoreNLP server started.') # CoreNLPServerError is thrown when a server is already running except CoreNLPServerError: logging.warning('CoreNLP server is already running.') self.parser = CoreNLPDependencyParser() def summarize(self, text): """ Summarizes a review. Extracts opinion-feature pairs from it. Parameters: text (str): Review text. Returns: Summary: List of opinion-feature pairs extracted from the review text. """ try: parse = next(self.parser.raw_parse(text)) # An HTTPError raised by the CoreNLP server is related to unrecognized characters in the review text except HTTPError: logging.warning(f'Review skipped: {text}') return [] # Search dependency parsing result to find "nsubj" or "amod" tags summary = list() for governor, dep, dependent in parse.triples(): if dep == 'nsubj': # Look if the nominal subject is noun and if it is modified by an adjective if governor[1] == 'JJ' and dependent[1] in {'NN', 'NNS'}: summary.append((governor[0].lower(), dependent[0].lower())) elif dep == 'amod': # Look if the adjective is linked to a noun if dependent[1] == 'JJ' and governor[1] in {'NN', 'NNS'}: summary.append((dependent[0].lower(), governor[0].lower())) return summary def stop(self): """ Stops the CoreNLP server of the summarizer object. """ self.server.stop() logging.info('CoreNLP server stopped.')
class CoreNLPSentenceAnalyzer(): """ A sentence analyzer based on Stanford CoreNLP. Refernces: The CoreNLP Syntax Parser https://bbengfort.github.io/snippets/2018/06/22/corenlp-nltk-parses.html Penn Treebank II Tags https://gist.github.com/nlothian/9240750 """ def __init__(self): self.lab_set = set() def init_server(self): STANDFORD = os.path.join("stanford-corenlp-full-2018-10-05") self.server = CoreNLPServer( os.path.join(STANDFORD, "stanford-corenlp-3.9.2.jar"), os.path.join(STANDFORD, "stanford-corenlp-3.9.2-models.jar")) self.server.start() self.parser = CoreNLPParser() def stop_server(self): self.server.stop() def parse_syntax(self, sent): return next(self.parser.raw_parse(sent)) def _collect_labels(self, node): """ Collect labels in the given node recursively. This method should not be invoked directly but done by collect_labels. """ try: self.lab_result.append(node.label()) except AttributeError: return for nn in node: self._collect_labels(nn) return def collect_labels(self, node): """ Collect all labels in a tree starting from the given node. """ self.lab_result = [] # used to collect labels in the recursion self._collect_labels(node) lab_counter = Counter(self.lab_result) # Keep the tags we have seen so far self.lab_set = self.lab_set.union(lab_counter.keys()) return lab_counter def get_lab_series(self, lab_counter_list): """ Convert and merge all lab_counters in the given list (the result of "collect_labels") into a series by using tags which have been seen so far (self.lab_set). """ rt = pd.DataFrame(columns=self.lab_set) for lab_counter in lab_counter_list: rt = rt.append(pd.Series(lab_counter, index=self.lab_set), ignore_index=True) rt = rt.add_prefix('penn_') return rt.sum()
import os import nltk from nltk.parse.corenlp import CoreNLPServer from nltk.parse.corenlp import CoreNLPParser from nltk.parse.corenlp import CoreNLPDependencyParser STANFORD = "stanford-corenlp-full-2018-10-05" jars = ( os.path.join(STANFORD, "stanford-corenlp-3.9.2.jar"), os.path.join(STANFORD, "stanford-corenlp-3.9.2-models.jar"), ) text = "turn right and go up the stairs and stand at the top." #text = "Walk out of the closet and into the hallway. Walk through the hallway entrance on the left. Stop just inside the entryway." #text = "Turn, putting the exit of the building on your left. Walk to the end of the entrance way and turn left. Travel across the kitchen area with the counter and chairs on your right. Continue straight until you reach the dining room. Enter the room and stop and wait one meter from the closest end of the long dining table." print(text) with CoreNLPServer(*jars): parser = CoreNLPParser() for i in parser.parse_text(text): print(i) parser = CoreNLPDependencyParser() for i in parser.raw_parse(text): print(i)
def start_server(self): self.server = CoreNLPServer(**self.context) self.server.start()