def dependency_parse(raw_data): from nltk.parse.corenlp import CoreNLPServer # The server needs to know the location of the following files: # - stanford-corenlp-X.X.X.jar # - stanford-corenlp-X.X.X-models.jar STANFORD = os.path.join("..", "stanford-corenlp-full-2020-04-20") # Create the server server = CoreNLPServer( os.path.join(STANFORD, "stanford-corenlp-4.0.0.jar"), os.path.join(STANFORD, "stanford-corenlp-4.0.0-models.jar"), ) # Start the server in the background server.start() from nltk.parse import CoreNLPParser parser = CoreNLPParser() new_data = [] for example in raw_data: sentence, features_seq = example[0], example[-1] parse = next(parser.raw_parse(sentence)) # get a few "important" neighboring words server.stop()
def start_core_nlp_server(self): home = os.path.expanduser("~") if os.name == 'nt': java_path = "C:\\Program Files\\Java\\jdk1.8.0_201\\bin\\java.exe" download_path = os.path.join(home, "Downloads") STANFORD_HOME = os.path.join(download_path, "stanford-corenlp-full-2018-10-05") else: #'posix java_path ="/usr/lib/jvm/java-8-oracle/" download_path = os.path.join(home, "ttp_sense_python") STANFORD_HOME = os.path.join(download_path, "lib") print('Stanford_Directory: ', STANFORD_HOME) os.environ['JAVAHOME'] = java_path # # The server needs to know the location of the following files: # # - stanford-corenlp-X.X.X.jar # # - stanford-corenlp-X.X.X-models.jar # # Create the server server = CoreNLPServer( os.path.join(STANFORD_HOME, "stanford-corenlp-3.9.2-models.jar"), os.path.join(STANFORD_HOME, "stanford-corenlp-3.9.2.jar"), os.path.join(STANFORD_HOME, "stanford-english-corenlp-2018-10-05-models.jar"), ) # # Start the server in the background server.start() print("Server Started")
def setup(manageServerInternally=False): global server config['isManagingServer'] = manageServerInternally if manageServerInternally: print("Starting CoreNLP server...") server = CoreNLPServer( os.path.join(STANFORD, "stanford-corenlp-3.9.2.jar"), os.path.join(STANFORD, "stanford-corenlp-3.9.2-models.jar"), ) server.start() else: try: print("Checking connection to CoreNLP server...") requests.get(f'{config["coreNLPServerURL"]}/live') except BaseException: print( "Error connecting to CoreNLP instance! Make sure the server is running in the background." ) print("The relevant command can be found in the README.") exit(1) setupQANet()
class CorenlpSubprocWordSplitter(CorenlpRemoteWordSplitter): """ A ``WordSplitter`` that uses CoreNLP's tokenizer. It starts ``corenlp-server`` as a sub-process, and call it's Web API. """ def __init__( self, path_to_jar: str = None, path_to_models_jar: str = None, verbose: str = False, java_options: str = None, corenlp_options: str = None, port: int = None, encoding: str = 'utf8', ): """ Parameters ---------- * For parameters from ``path_to_jar`` to ``port``, see https://www.nltk.org/api/nltk.parse.html#nltk.parse.corenlp. * For parameter ``encoding``, see https://www.nltk.org/api/nltk.parse.html#nltk.parse.corenlp.CoreNLPParser """ self._server = CoreNLPServer(path_to_jar, path_to_models_jar, verbose, java_options, corenlp_options, port) self._server.start() super().__init__(self._server.url, encoding) def __del__(self): self._server.stop()
def startServer(self): java_path = "C:\\Program Files\\Java\\jdk1.8.0_201\\bin\\java.exe" os.environ['JAVAHOME'] = java_path home = os.path.expanduser("~") download_path = os.path.join(home, "Downloads") print(download_path) # # The server needs to know the location of the following files: # # - stanford-corenlp-X.X.X.jar # # - stanford-corenlp-X.X.X-models.jar STANFORD = os.path.join(download_path, "stanford-corenlp-full-2018-10-05") # # Create the server server = CoreNLPServer( os.path.join(STANFORD, "stanford-corenlp-3.9.2-models.jar"), os.path.join(STANFORD, "stanford-corenlp-3.9.2.jar"), os.path.join(STANFORD, "stanford-english-corenlp-2018-10-05-models"), ) # # Start the server in the background server.start() print("Server Started") self.stanfordCoreNLP = StanfordCoreNLP('http://localhost:9000') return self.stanfordCoreNLP
def server(): print('Starting CoreNLP server...') serv = CoreNLPServer(path_to_jar=config.CORENLP_JAR, path_to_models_jar=config.CORENLP_MODELS_JAR) try: serv.start() print('Server started.') while True: pass except KeyboardInterrupt: pass except Exception as e: print(e) finally: print('Stopping server...') serv.stop()
class CoreNLP: def __init__(self, args): self.context = dict() self.server = None self.set_system_env(*args) def set_system_env(self, *args): idx = 1 while idx < len(args): if args[idx] == '--stanford': idx += 1 standford_path = args[idx] self.context['path_to_jar'] = os.path.join(standford_path, 'stanford-corenlp-3.9.2.jar') self.context['path_to_models_jar'] = os.path.join(standford_path, 'stanford-corenlp-3.9.2-models.jar') print('corenlp jar:', self.context['path_to_jar']) print('corenlp models jar:', self.context['path_to_models_jar']) elif args[idx] == '--java': idx += 1 java_path = args[idx] os.environ['JAVAHOME'] = java_path print('java path:', java_path) idx += 1 def start_server(self): self.server = CoreNLPServer(**self.context) self.server.start() def stop_server(self): self.server.stop() def parse_tree(self, s): parser = CoreNLPParser() parse = next(parser.raw_parse(s)) # parse.draw() return parse def dependency_parse_tree(self, s): parser = CoreNLPDependencyParser() parse = next(parser.raw_parse(s)) return parse
def start_CoreNLPServer(self): url = 'http://localhost:9000' status_code = 0 try: status_code = urllib.request.urlopen(url).getcode() except: pass if status_code != 200: print('CoreNLPServer is starting {}'.format(url)) try: os.environ['CLASSPATH'] = self.model_path server = CoreNLPServer(port=9000) server.start() status_code = urllib.request.urlopen(url).getcode() print('server started {}'.format(status_code)) except Exception as e: print(url, e) raise Exception(e)
def start_core_nlp_server(self): os.environ['JAVAHOME'] = self.JAVA_HOME HOMEDIR = os.path.expanduser("~") DOWNLOAD_HOME = os.path.join(HOMEDIR, self.DOWNLOAD_HOME) STANFORD_HOME = os.path.join(DOWNLOAD_HOME, self.STANFORD_HOME) print('Stanford_Directory: ', STANFORD_HOME) # # The server needs to know the location of the following files: # # - stanford-corenlp-X.X.X.jar # # - stanford-corenlp-X.X.X-models.jar # # Create the server server = CoreNLPServer( os.path.join(STANFORD_HOME, "stanford-corenlp-3.9.2-models.jar"), os.path.join(STANFORD_HOME, "stanford-corenlp-3.9.2.jar"), os.path.join(STANFORD_HOME, "stanford-english-corenlp-2018-10-05-models.jar"), ) # # Start the server in the background server.start() print("Server Started")
def setup(self): url = settings.CORENLP_URL if url is None: server = CoreNLPServer( settings.CORENLP_PATH, settings.CORENLP_MODEL_PATH, ) server.start() self.server = server url = server.url else: print("[TreeParser] Using existing CoreNLP Server...") self.parser = CoreNLPParser(url=url) # maybe separated with another class... self.dependency_parser = CoreNLPDependencyParser(url=url) return self.parser
from clean_data import process_sentence ##2017 12 3 using a different parser to parse sentence ''' from nltk.parse.stanford import StanfordDependencyParser path_to_jar = '/Users/collin/stanford/stanford-parser-full-2017-06-09/stanford-parser.jar' path_to_models_jar = '/Users/collin/stanford/stanford-parser-full-2017-06-09/stanford-parser-3.8.0-models.jar' dependency_parser = StanfordDependencyParser(path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar) ''' from nltk.parse.corenlp import CoreNLPServer, CoreNLPDependencyParser path_to_jar = '/Users/collin/stanford/stanford-corenlp-full-2017-06-09/stanford-corenlp-3.8.0.jar' path_to_models_jar = '/Users/collin/stanford/stanford-corenlp-full-2017-06-09/stanford-corenlp-3.8.0-models.jar' server = CoreNLPServer(path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar) server.start() dependency_parser = CoreNLPDependencyParser() stemmer = SnowballStemmer('english') def stem(w): return stemmer.stem(w) DR_one = ['nsubj', 'dobj', 'xsubj', 'csubj', 'nmod', 'iobj', 'xcomp'] DR_two = ['amod'] #DR_two = ['nsubj','dobj','xsubj','csubj','nsubjpass','nmod','iobj'] DR_three = ['conj'] DR = DR_one + DR_three
class CoreNLPSentenceAnalyzer(): """ A sentence analyzer based on Stanford CoreNLP. Refernces: The CoreNLP Syntax Parser https://bbengfort.github.io/snippets/2018/06/22/corenlp-nltk-parses.html Penn Treebank II Tags https://gist.github.com/nlothian/9240750 """ def __init__(self): self.lab_set = set() def init_server(self): STANDFORD = os.path.join("stanford-corenlp-full-2018-10-05") self.server = CoreNLPServer( os.path.join(STANDFORD, "stanford-corenlp-3.9.2.jar"), os.path.join(STANDFORD, "stanford-corenlp-3.9.2-models.jar")) self.server.start() self.parser = CoreNLPParser() def stop_server(self): self.server.stop() def parse_syntax(self, sent): return next(self.parser.raw_parse(sent)) def _collect_labels(self, node): """ Collect labels in the given node recursively. This method should not be invoked directly but done by collect_labels. """ try: self.lab_result.append(node.label()) except AttributeError: return for nn in node: self._collect_labels(nn) return def collect_labels(self, node): """ Collect all labels in a tree starting from the given node. """ self.lab_result = [] # used to collect labels in the recursion self._collect_labels(node) lab_counter = Counter(self.lab_result) # Keep the tags we have seen so far self.lab_set = self.lab_set.union(lab_counter.keys()) return lab_counter def get_lab_series(self, lab_counter_list): """ Convert and merge all lab_counters in the given list (the result of "collect_labels") into a series by using tags which have been seen so far (self.lab_set). """ rt = pd.DataFrame(columns=self.lab_set) for lab_counter in lab_counter_list: rt = rt.append(pd.Series(lab_counter, index=self.lab_set), ignore_index=True) rt = rt.add_prefix('penn_') return rt.sum()
class Summarizer: """ Summarizer class implementing opinion-feature extraction. Uses Stanford CoreNLP dependency parser. Attributes: server (CoreNLPServer): CoreNLP server for accessing Stanford CoreNLP services. parser (CoreNLPDependencyParser): CoreNLP dependency parser. """ def __init__(self, jar_path, models_jar_path): """ The constructor for Summarizer class. Parameters: jar_path (str): Filepath to Stanford CoreNLP .jar file. models_jar_path (str): Filepath to Stanford CoreNLP models .jar file. """ logging.info('Starting CoreNLP server...') self.server = CoreNLPServer(path_to_jar=jar_path, path_to_models_jar=models_jar_path) try: self.server.start() logging.info('CoreNLP server started.') # CoreNLPServerError is thrown when a server is already running except CoreNLPServerError: logging.warning('CoreNLP server is already running.') self.parser = CoreNLPDependencyParser() def summarize(self, text): """ Summarizes a review. Extracts opinion-feature pairs from it. Parameters: text (str): Review text. Returns: Summary: List of opinion-feature pairs extracted from the review text. """ try: parse = next(self.parser.raw_parse(text)) # An HTTPError raised by the CoreNLP server is related to unrecognized characters in the review text except HTTPError: logging.warning(f'Review skipped: {text}') return [] # Search dependency parsing result to find "nsubj" or "amod" tags summary = list() for governor, dep, dependent in parse.triples(): if dep == 'nsubj': # Look if the nominal subject is noun and if it is modified by an adjective if governor[1] == 'JJ' and dependent[1] in {'NN', 'NNS'}: summary.append((governor[0].lower(), dependent[0].lower())) elif dep == 'amod': # Look if the adjective is linked to a noun if dependent[1] == 'JJ' and governor[1] in {'NN', 'NNS'}: summary.append((dependent[0].lower(), governor[0].lower())) return summary def stop(self): """ Stops the CoreNLP server of the summarizer object. """ self.server.stop() logging.info('CoreNLP server stopped.')