def __init__(self): os.environ["JAVAHOME"] = "C:\Program Files\Java\jdk1.8.0_151\\bin" jar_path = "D:\Stanford Core NLP\stanford-corenlp-full-2018-10-05\stanford-corenlp-3.9.2.jar" model_path = "D:\Stanford Core NLP\stanford-corenlp-full-2018-10-05\stanford-corenlp-3.9.2-models.jar" self.parser = stanford.StanfordDependencyParser( path_to_jar=jar_path, path_to_models_jar=model_path) print("Standford Dependency Parser instantiated")
def __init__(self, host='http://localhost', port=9000): self.nlp = StanfordCoreNLP( host, port=port, timeout=30000) # , quiet=False, logging_level=logging.DEBUG) # self.nlp = StanfordCoreNLP(r'/Users/jovi/Desktop/CMU/NLP/project/stanford-corenlp-full-2018-02-27') self.nlp_depParser = stanford.StanfordDependencyParser() self.nlp_parser = stanford.StanfordParser() self.props = { 'annotators': 'coref', 'pipelineLanguage': 'en' # , # 'outputFormat': 'json' } self.url = "http://nlp02.lti.cs.cmu.edu:9000"
def __init__(self): root_folder = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(root_folder, 'environ.yaml'), 'r') as f: env = yaml.load(f) stanford_parser_folder = env['stanford_parser_folder'] os.environ['STANFORD_PARSER'] = stanford_parser_folder os.environ['STANFORD_MODELS'] = stanford_parser_folder with open(os.path.join(root_folder, 'ioput_maxent_classifier.pickle'), 'r') as f: self._maxent_classifier = cPickle.loads(f.read()) self._tree_parser = stanford.StanfordParser( model_path=env['model_path']) self._dependency_parser = stanford.StanfordDependencyParser( model_path=env['model_path']) self._NPs = []
def parse(sentence): os.environ["JAVAHOME"] = "C:\Program Files\Java\jdk1.8.0_151\\bin" jar_path = "D:\Stanford Core NLP\stanford-corenlp-full-2018-10-05\stanford-corenlp-3.9.2.jar" model_path = "D:\Stanford Core NLP\stanford-corenlp-full-2018-10-05\stanford-corenlp-3.9.2-models.jar" parser = stanford.StanfordDependencyParser(path_to_jar=jar_path, path_to_models_jar=model_path) sentences = parser.raw_parse(sentence) dep = sentences.__next__() parsed = list(dep.triples()) # pprint(parsed) # export(parsed, "test_result.pickle") return parsed
def stanford_parse(sentence): parser = stanford.StanfordDependencyParser( model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz") warnings.filterwarnings('error') sub_obj_list = [] try: parse = parser.raw_parse(sentence) for sent in parse: #print(type(sent)) #len(sent) for node in sent.triples(): if ('nsubj' in node or 'dobj' in node or 'nsubjpass' in node): # print node[2] # if('NNP' in node[2] or 'NNPS' in node[2] or 'NN' in node[2] or 'NNS' in node[2]): if (('NNP' or 'NNPS' or 'NN' or 'NNS') in node[2]): sub_obj_list.append(node[2]) except Warning: print 'Warning was raised as an exception!' return sub_obj_list
def get_dep_parser(cls): if cls._dep_parser is None: cls._dep_parser = stanford.StanfordDependencyParser( model_path=cls.PATH_TO_STANFORD + "englishPCFG.ser.gz") return cls._dep_parser
import os from nltk.parse import stanford from collections import Counter import spacy os.environ['STANFORD_PARSER'] = '../stanford-parser-full-2018-10-17/stanford-parser.jar' os.environ['STANFORD_MODELS'] = '../stanford-parser-full-2018-10-17/stanford-parser-3.9.2-models.jar' pcfg_parser = stanford.StanfordParser(model_path='../stanford-parser-full-2018-10-17/englishPCFG.ser.gz') dep_parser = stanford.StanfordDependencyParser(model_path='../stanford-parser-full-2018-10-17/englishPCFG.ser.gz') nlp = spacy.load('en_core_web_lg') def stanford_parser(sentence): pcfg = pcfg_parser.raw_parse(sentence).__next__() dep = dep_parser.raw_parse(sentence).__next__() dep_list = list(dep.triples()) return dep_list, pcfg def stanford_pcfg(sentence): pcfg = pcfg_parser.raw_parse(sentence).__next__() return pcfg def stanford_dep(sentence): dep = dep_parser.raw_parse(sentence).__next__() dep_list = list(dep.triples()) return dep_list #input should be a sentence def Spacy_parser(sentence): #from tabulate import tabulate
if __name__ == '__main__': if len(sys.argv) >= 3: input_file = sys.argv[1] output_file = sys.argv[2] else: input_file = 'data/atomic/train_intent_react.txt' output_file = 'data/atomic/train_svo.txt' # os.environ['STANFORD_PARSER'] = '/usr/local/Cellar/stanford-parser/3.9.1/libexec/stanford-parser.jar' # os.environ['STANFORD_MODELS'] = '/usr/local/Cellar/stanford-parser/3.9.1/libexec/stanford-parser-3.9.1-models.jar' os.environ[ 'STANFORD_PARSER'] = '/users4/kliao/data/stanford_nlp/stanford-parser.jar' os.environ[ 'STANFORD_MODELS'] = '/users4/kliao/data/stanford_nlp/stanford-parser-3.9.1-models.jar' parser = stanford.StanfordDependencyParser( model_path='edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz') lines = open(input_file, 'r').readlines() lines = [line.strip().split(' | ') for line in lines] output_file = open(output_file, 'w') for line in lines: event, intent, react = line verb = None subj = None obj = None graph = next(parser.raw_parse(event)) children_dict = {} for i in graph.nodes:
def __init__(self): self.parser = stanford.StanfordDependencyParser( model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz")
from nltk.parse.stanford import StanfordDependencyParser from nltk.parse import stanford import nltk import os from dataset import del_mac_DS from helper import * from nltk.tokenize import casual_tokenize import time #添加stanford环境变量,此处需要手动修改,jar包地址为绝对地址。 dir_path = './stanford_parser_jar' os.environ['STANFORD_PARSER'] = os.path.join(dir_path, 'stanford-parser.jar') os.environ['STANFORD_MODELS'] = os.path.join( dir_path, 'stanford-parser-3.5.2-models.jar') parser = stanford.StanfordDependencyParser( model_path=os.path.join(dir_path, 'englishPCFG.ser.gz')) def get_the_parse(sent_list_in): tag_dict = {} sentences = parser.parse_sents([sent_list_in]) tree = [parse.tree() for parse in list(sentences)[0]] n_leaves = len(tree[0].leaves()) leavepos = list(tree[0].leaf_treeposition(n) for n in range(n_leaves)) for pos in tree[0].treepositions(): c_word = None if pos not in leavepos: c_word = tree[0][pos].label() else: c_word = tree[0][pos]
import numpy as np import re, os import networkx as nx from sklearn.svm import LinearSVC from sklearn.metrics import precision_recall_fscore_support, accuracy_score from nltk.parse import stanford FILE_ENCODING = "utf8" TRAINING_SIZE = 3000 os.environ['JAVAHOME'] = "C:/Program Files/Java/jdk1.8.0_40/bin" os.environ['STANFORD_PARSER'] = '/jars' os.environ['STANFORD_MODELS'] = '/jars' parser = stanford.StanfordParser(model_path="/jars/englishPCFG.ser.gz") dep_parser = stanford.StanfordDependencyParser(model_path="/jars/englishPCFG.ser.gz") #=========================================================================== # parsed = parser.raw_parse("Example sentence.") # dep_parsed = dep_parser.raw_parse("Example sentence.") #=========================================================================== # Classifier clf = LinearSVC() sentences = [] labels = [] def chunks(arr, n): for i in range(0, len(arr), n): yield arr[i:i+n]
import os from nltk.parse import stanford from nltk import word_tokenize import nltk.data from nltk.tag.stanford import StanfordNERTagger #java_path = "C:\\Program Files\\Java\\jdk1.8.0_131\\bin\\java.exe" #java_path = "C:\\Program Files\\Java\\jdk1.8.0_151\\bin\\java.exe" java_path = "java.exe" os.environ['JAVAHOME'] = java_path os.environ['STANFORD_PARSER'] = 'jars\\stanford-parser.jar' os.environ['STANFORD_MODELS'] = 'jars\\stanford-parser-3.9.2-models.jar' parser = stanford.StanfordParser(model_path="englishPCFG.ser.gz") dep_parser = stanford.StanfordDependencyParser( 'jars\\stanford-parser.jar', 'jars\\stanford-parser-3.9.2-models.jar') tokenizer = nltk.data.load('tokenizers\\punkt\\english.pickle') ner3 = StanfordNERTagger('english.all.3class.distsim.crf.ser.gz', 'jars\\stanford-ner.jar') ner7 = StanfordNERTagger('english.muc.7class.distsim.crf.ser.gz', 'jars\\stanford-ner.jar') def sentence_parse(sentence): return parser.raw_parse(sentence) def get_root_sub_obj(sentence): final_dependency = [] result = dep_parser.raw_parse(sentence) parsetree = list(result)[0]
# Hyper Parameters DEP_VEC_LEN = 50 TRIGGER_CANDIDATE_LIST = [ 'NN', 'NNS', 'NNP', 'NNPS', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ' ] # 添加stanford环境变量,此处需要手动修改,jar包地址为绝对地址。 os.environ['STANFORD_PARSER'] = '/home/sfdai/jars/stanford-parser.jar' os.environ[ 'STANFORD_MODELS'] = '/home/sfdai/jars/stanford-parser-3.8.0-models.jar' # 为JAVAHOME添加环境变量 java_path = "/usr/lib/jvm/java-8-oracle/jre/bin/java" os.environ['JAVAHOME'] = java_path dependency_parser = stanford.StanfordDependencyParser( model_path= "/home/sfdai/stanford-parser-full-2017-06-09/stanford-parser-3.8.0-models/edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz" ) word_not_vec_dic = { "'s": "0", "n't": "not", "'re": "are", "'ve": "have", "'ll": "will", "'m": "am", "'d": "1" } def process_sentence(raw_sentence): sentences = dependency_parser.raw_parse(raw_sentence)