def __init__(self):
        os.environ["JAVAHOME"] = "C:\Program Files\Java\jdk1.8.0_151\\bin"

        jar_path = "D:\Stanford Core NLP\stanford-corenlp-full-2018-10-05\stanford-corenlp-3.9.2.jar"
        model_path = "D:\Stanford Core NLP\stanford-corenlp-full-2018-10-05\stanford-corenlp-3.9.2-models.jar"

        self.parser = stanford.StanfordDependencyParser(
            path_to_jar=jar_path, path_to_models_jar=model_path)

        print("Standford Dependency Parser instantiated")
예제 #2
0
    def __init__(self, host='http://localhost', port=9000):
        self.nlp = StanfordCoreNLP(
            host, port=port,
            timeout=30000)  # , quiet=False, logging_level=logging.DEBUG)

        # self.nlp = StanfordCoreNLP(r'/Users/jovi/Desktop/CMU/NLP/project/stanford-corenlp-full-2018-02-27')
        self.nlp_depParser = stanford.StanfordDependencyParser()
        self.nlp_parser = stanford.StanfordParser()

        self.props = {
            'annotators': 'coref',
            'pipelineLanguage': 'en'  # ,
            # 'outputFormat': 'json'
        }
        self.url = "http://nlp02.lti.cs.cmu.edu:9000"
예제 #3
0
    def __init__(self):
        root_folder = os.path.dirname(os.path.realpath(__file__))
        with open(os.path.join(root_folder, 'environ.yaml'), 'r') as f:
            env = yaml.load(f)

        stanford_parser_folder = env['stanford_parser_folder']
        os.environ['STANFORD_PARSER'] = stanford_parser_folder
        os.environ['STANFORD_MODELS'] = stanford_parser_folder

        with open(os.path.join(root_folder, 'ioput_maxent_classifier.pickle'),
                  'r') as f:
            self._maxent_classifier = cPickle.loads(f.read())

        self._tree_parser = stanford.StanfordParser(
            model_path=env['model_path'])
        self._dependency_parser = stanford.StanfordDependencyParser(
            model_path=env['model_path'])

        self._NPs = []
def parse(sentence):
    os.environ["JAVAHOME"] = "C:\Program Files\Java\jdk1.8.0_151\\bin"

    jar_path = "D:\Stanford Core NLP\stanford-corenlp-full-2018-10-05\stanford-corenlp-3.9.2.jar"
    model_path = "D:\Stanford Core NLP\stanford-corenlp-full-2018-10-05\stanford-corenlp-3.9.2-models.jar"

    parser = stanford.StanfordDependencyParser(path_to_jar=jar_path,
                                               path_to_models_jar=model_path)

    sentences = parser.raw_parse(sentence)

    dep = sentences.__next__()
    parsed = list(dep.triples())

    # pprint(parsed)

    # export(parsed, "test_result.pickle")

    return parsed
예제 #5
0
def stanford_parse(sentence):
    parser = stanford.StanfordDependencyParser(
        model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz")
    warnings.filterwarnings('error')
    sub_obj_list = []
    try:
        parse = parser.raw_parse(sentence)
        for sent in parse:
            #print(type(sent))
            #len(sent)
            for node in sent.triples():

                if ('nsubj' in node or 'dobj' in node or 'nsubjpass' in node):
                    #                 print node[2]
                    #                 if('NNP' in node[2] or 'NNPS' in node[2] or 'NN' in node[2] or 'NNS' in node[2]):
                    if (('NNP' or 'NNPS' or 'NN' or 'NNS') in node[2]):
                        sub_obj_list.append(node[2])
    except Warning:
        print 'Warning was raised as an exception!'

    return sub_obj_list
예제 #6
0
 def get_dep_parser(cls):
     if cls._dep_parser is None:
         cls._dep_parser = stanford.StanfordDependencyParser(
             model_path=cls.PATH_TO_STANFORD + "englishPCFG.ser.gz")
     return cls._dep_parser
예제 #7
0
import os
from nltk.parse import stanford
from collections import Counter
import spacy

os.environ['STANFORD_PARSER'] = '../stanford-parser-full-2018-10-17/stanford-parser.jar'
os.environ['STANFORD_MODELS'] = '../stanford-parser-full-2018-10-17/stanford-parser-3.9.2-models.jar'
pcfg_parser = stanford.StanfordParser(model_path='../stanford-parser-full-2018-10-17/englishPCFG.ser.gz')
dep_parser = stanford.StanfordDependencyParser(model_path='../stanford-parser-full-2018-10-17/englishPCFG.ser.gz')

nlp = spacy.load('en_core_web_lg')

def stanford_parser(sentence):
    pcfg = pcfg_parser.raw_parse(sentence).__next__()
    dep = dep_parser.raw_parse(sentence).__next__()
    dep_list = list(dep.triples())
    return dep_list, pcfg

def stanford_pcfg(sentence):
    pcfg = pcfg_parser.raw_parse(sentence).__next__()
    return pcfg

def stanford_dep(sentence):
    dep = dep_parser.raw_parse(sentence).__next__()
    dep_list = list(dep.triples())
    return dep_list

#input should be a sentence
def Spacy_parser(sentence): 

    #from tabulate import tabulate    
if __name__ == '__main__':
    if len(sys.argv) >= 3:
        input_file = sys.argv[1]
        output_file = sys.argv[2]
    else:
        input_file = 'data/atomic/train_intent_react.txt'
        output_file = 'data/atomic/train_svo.txt'

    # os.environ['STANFORD_PARSER'] = '/usr/local/Cellar/stanford-parser/3.9.1/libexec/stanford-parser.jar'
    # os.environ['STANFORD_MODELS'] = '/usr/local/Cellar/stanford-parser/3.9.1/libexec/stanford-parser-3.9.1-models.jar'
    os.environ[
        'STANFORD_PARSER'] = '/users4/kliao/data/stanford_nlp/stanford-parser.jar'
    os.environ[
        'STANFORD_MODELS'] = '/users4/kliao/data/stanford_nlp/stanford-parser-3.9.1-models.jar'
    parser = stanford.StanfordDependencyParser(
        model_path='edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz')

    lines = open(input_file, 'r').readlines()
    lines = [line.strip().split(' | ') for line in lines]
    output_file = open(output_file, 'w')

    for line in lines:
        event, intent, react = line

        verb = None
        subj = None
        obj = None

        graph = next(parser.raw_parse(event))
        children_dict = {}
        for i in graph.nodes:
예제 #9
0
 def __init__(self):
     self.parser = stanford.StanfordDependencyParser(
         model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz")
예제 #10
0
파일: parse.py 프로젝트: Cheneng/SentC
from nltk.parse.stanford import StanfordDependencyParser
from nltk.parse import stanford
import nltk
import os
from dataset import del_mac_DS
from helper import *
from nltk.tokenize import casual_tokenize
import time

#添加stanford环境变量,此处需要手动修改,jar包地址为绝对地址。
dir_path = './stanford_parser_jar'
os.environ['STANFORD_PARSER'] = os.path.join(dir_path, 'stanford-parser.jar')
os.environ['STANFORD_MODELS'] = os.path.join(
    dir_path, 'stanford-parser-3.5.2-models.jar')

parser = stanford.StanfordDependencyParser(
    model_path=os.path.join(dir_path, 'englishPCFG.ser.gz'))


def get_the_parse(sent_list_in):
    tag_dict = {}
    sentences = parser.parse_sents([sent_list_in])
    tree = [parse.tree() for parse in list(sentences)[0]]
    n_leaves = len(tree[0].leaves())
    leavepos = list(tree[0].leaf_treeposition(n) for n in range(n_leaves))

    for pos in tree[0].treepositions():
        c_word = None
        if pos not in leavepos:
            c_word = tree[0][pos].label()
        else:
            c_word = tree[0][pos]
import numpy as np
import re, os
import networkx as nx
from sklearn.svm import LinearSVC
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from nltk.parse import stanford

FILE_ENCODING = "utf8"
TRAINING_SIZE = 3000

os.environ['JAVAHOME'] = "C:/Program Files/Java/jdk1.8.0_40/bin"
os.environ['STANFORD_PARSER'] = '/jars'
os.environ['STANFORD_MODELS'] = '/jars'

parser = stanford.StanfordParser(model_path="/jars/englishPCFG.ser.gz")
dep_parser = stanford.StanfordDependencyParser(model_path="/jars/englishPCFG.ser.gz")

#===========================================================================
# parsed =   parser.raw_parse("Example sentence.")
# dep_parsed = dep_parser.raw_parse("Example sentence.")
#===========================================================================

# Classifier
clf = LinearSVC()

sentences = []
labels = []

def chunks(arr, n):
    for i in range(0, len(arr), n):
        yield arr[i:i+n]
예제 #12
0
import os
from nltk.parse import stanford
from nltk import word_tokenize
import nltk.data
from nltk.tag.stanford import StanfordNERTagger

#java_path = "C:\\Program Files\\Java\\jdk1.8.0_131\\bin\\java.exe"
#java_path = "C:\\Program Files\\Java\\jdk1.8.0_151\\bin\\java.exe"
java_path = "java.exe"
os.environ['JAVAHOME'] = java_path
os.environ['STANFORD_PARSER'] = 'jars\\stanford-parser.jar'
os.environ['STANFORD_MODELS'] = 'jars\\stanford-parser-3.9.2-models.jar'

parser = stanford.StanfordParser(model_path="englishPCFG.ser.gz")
dep_parser = stanford.StanfordDependencyParser(
    'jars\\stanford-parser.jar', 'jars\\stanford-parser-3.9.2-models.jar')
tokenizer = nltk.data.load('tokenizers\\punkt\\english.pickle')
ner3 = StanfordNERTagger('english.all.3class.distsim.crf.ser.gz',
                         'jars\\stanford-ner.jar')
ner7 = StanfordNERTagger('english.muc.7class.distsim.crf.ser.gz',
                         'jars\\stanford-ner.jar')


def sentence_parse(sentence):
    return parser.raw_parse(sentence)


def get_root_sub_obj(sentence):
    final_dependency = []
    result = dep_parser.raw_parse(sentence)
    parsetree = list(result)[0]
예제 #13
0
# Hyper Parameters
DEP_VEC_LEN = 50
TRIGGER_CANDIDATE_LIST = [
    'NN', 'NNS', 'NNP', 'NNPS', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ'
]
# 添加stanford环境变量,此处需要手动修改,jar包地址为绝对地址。
os.environ['STANFORD_PARSER'] = '/home/sfdai/jars/stanford-parser.jar'
os.environ[
    'STANFORD_MODELS'] = '/home/sfdai/jars/stanford-parser-3.8.0-models.jar'

# 为JAVAHOME添加环境变量
java_path = "/usr/lib/jvm/java-8-oracle/jre/bin/java"
os.environ['JAVAHOME'] = java_path
dependency_parser = stanford.StanfordDependencyParser(
    model_path=
    "/home/sfdai/stanford-parser-full-2017-06-09/stanford-parser-3.8.0-models/edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
)

word_not_vec_dic = {
    "'s": "0",
    "n't": "not",
    "'re": "are",
    "'ve": "have",
    "'ll": "will",
    "'m": "am",
    "'d": "1"
}


def process_sentence(raw_sentence):
    sentences = dependency_parser.raw_parse(raw_sentence)