Python StanfordParser 예제들, nltk.parse.stanford.StanfordParser Python 예제들

예제 #1

0

파일 보기

    def __init__(self, datasets_path, corpus_name, parse_type, lang='english'):
        self.datasets_path = datasets_path
        self.corpus_name = corpus_name
        self.corpus_path = path.join(datasets_path, 'raw', corpus_name)
        self.docs_path = path.join(self.corpus_path, "docs")
        self.topics_file = path.join(self.corpus_path, "topics.xml")
        self.models_path = path.join(self.corpus_path, "models")
        self.smodels_path = path.join(self.corpus_path, "smodels")
        self.jar_path = path.join(PROJECT_PATH, "summarizer", "jars")
        os.environ['CLASSPATH'] = self.jar_path
        self.cleaned_path = path.join(datasets_path, "processed")

        if parse_type == 'parse':
            if lang == 'english':
                self.parser = stanford.StanfordParser(model_path="%s/englishPCFG.ser.gz" % (self.jar_path))
            if lang == 'german':
                self.parser = stanford.StanfordParser(model_path="%s/germanPCFG.ser.gz" % (self.jar_path))
                # self.cleaned_path = path.join(datasets_path, "processed.parse")
        if parse_type == 'props':  # TODO
            if lang == 'english':
                self.props_parser = ClausIE.get_instance()
            if lang == 'german':
                self.parser = stanford.StanfordParser(model_path="%s/germanPCFG.ser.gz" % (self.jar_path))
            self.cleaned_path = path.join(datasets_path, "processed.props")
        self.add_dataset_index(lang)

예제 #2

0

파일 보기

파일: tree.py 프로젝트: steve3p0/LING511

def parse(sentence, parser, request_formats):
    tree_str = ""

    if parser == "pdx":
        # Use the default PSU Parser - but first use stanford and convert to PSU format
        stanford_parser = stanford.StanfordParser(model_path=model_path)
        psu_tree = Tree(parser=stanford_parser)
        tree = psu_tree.parse_sentence(sentence, require_tense=False)
    elif parser == "stanford":
        stanford_parser = stanford.StanfordParser(model_path=model_path)
        #tree_parser = Tree(parser=stanford_parser)
        tree = next(stanford_parser.raw_parse(sentence))

    print(tree_str)

    # Create dictionary of parse objects to return
    response_formats = {}

    if "tree_image" in request_formats:
        print("do image thingy")
        # print('getting object from tree.__repr__()')
        # img = tree.__repr__()
        psu_tree = Tree()
        # img = psu_tree.write_tree_stream(tree)
        img_byte_arr = psu_tree.write_tree_stream(tree)
        import base64
        # encoded_img = base64.encodebytes(img_byte_arr.getvalue()).decode('ascii')
        encoded_img = base64.encodebytes(img_byte_arr).decode('ascii')
        response_formats["tree_image"] = encoded_img

    if "tree_ascii" in request_formats:
        print("add tree_ascii to output")

        from nltk.treeprettyprinter import TreePrettyPrinter
        ascii_str = str(TreePrettyPrinter(tree)).rstrip()
        #ascii_str = nltk.Tree.pretty_print(tree)
        response_formats["tree_ascii"] = ascii_str

    if "bracket_diagram" in request_formats:
        print("add labelled_bracket to output")
        bracket_diagram = str(tree)
        open_b, close_b = "[]"
        bracket_diagram = bracket_diagram.replace("(", open_b).replace(
            ")", close_b)
        bracket_diagram = " ".join(bracket_diagram.split())
        response_formats["bracket_diagram"] = bracket_diagram

    if "tree_str" in request_formats:
        print("add tree_str to output")
        tree_str = str(tree)
        tree_str = " ".join(tree_str.split())
        response_formats["tree_str"] = tree_str

    res = {
        'sentence': sentence,
        'parser': parser,
        'response_formats': response_formats
    }

    return res

예제 #3

0

파일 보기

파일: parse.py 프로젝트: RillaHai/Sentence-Similarity

 def setlang(self, lang, kind):
     os.environ[
         'STANFORD_PARSER'] = '/home/bear/Downloads/parser/stanford-parser-full-2015-12-09/'
     os.environ['JAVA_HOME'] = '/usr/lib/jvm/jdk1.8.0_91/jre/jre/bin/'
     os.environ[
         'STANFORD_MODELS'] = '/home/bear/Downloads/parser/stanford-parser-full-2015-12-09/stanford-parser-3.6.0-models.jar'
     if lang == "English":
         if kind == "con":
             self.parser = stanford.StanfordParser(
                 model_path=
                 "/home/bear/Downloads/parser/stanford-parser-full-2015-12-09/stanford-parser-3.6.0-models/edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
             )
         elif kind == "de":
             pass
         elif kind == "neu":
             pass
     elif lang == "Chinese":
         if kind == "con":
             self.parser = stanford.StanfordParser(
                 model_path=
                 "/home/bear/Downloads/parser/stanford-parser-full-2015-12-09/stanford-parser-3.6.0-models/edu/stanford/nlp/models/lexparser/chineseFactored.ser.gz"
             )
         elif kind == "de":
             pass
         elif kind == "neu":
             pass
     elif lang == "French":
         if kind == "con":
             self.parser = stanford.StanfordParser(
                 model_path=
                 "/home/bear/Downloads/parser/stanford-parser-full-2015-12-09/stanford-parser-3.6.0-models/edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz"
             )
         elif kind == "de":
             pass
         elif kind == "neu":
             pass
     elif lang == "German":
         if kind == "con":
             self.parser = stanford.StanfordParser(
                 model_path=
                 "/home/bear/Downloads/parser/stanford-parser-full-2015-12-09/stanford-parser-3.6.0-models/edu/stanford/nlp/models/lexparser/germanPCFG.ser.gz"
             )
         elif kind == "de":
             pass
         elif kind == "neu":
             pass
     elif lang == "Spanish":
         if kind == "con":
             self.parser = stanford.StanfordParser(
                 model_path=
                 "/home/bear/Downloads/parser/stanford-parser-full-2015-12-09/stanford-parser-3.6.0-models/edu/stanford/nlp/models/lexparser/spanishPCFG.ser.gz"
             )
         elif kind == "de":
             pass
         elif kind == "neu":
             pass

예제 #4

0

파일 보기

파일: generate_trees.py 프로젝트: shafiahmed/imgcap

def init_parser(parseType='C'):
    # initializes the parser
    #
    # parseType == 'C' is a constituency tree (via stanford PCFG)
    # parseType == 'D' is a dependency tree (english_SD)
    os.environ['STANFORD_PARSER'] = PARSER_LOC
    os.environ['STANFORD_MODELS'] = MODEL_LOC
    if parseType == 'C':
        parser = stanford.StanfordParser(model_path=CONST_LOC)
    elif parseType == 'D':
        parser = stanford.StanfordParser(model_path=CONST_LOC)
    else:
        print 'Unrecognized parser type request'
        return
    return parser

예제 #5

0

파일 보기

def compress_sent_tree(sentence):
	''' compress a given sentence (as a string of words)
		return a compressed sentence (also as a string of words)
	'''
	parser = stanford.StanfordParser (
        model_path          = model_path,
        path_to_models_jar  = parser_jar
    )
	
	try:
		#parse
		s_parsed = parser.raw_parse(sentence)
		#form a tree
		for s in s_parsed:
			#print("parse: ", str(s))
			tree1 = tree.Tree.fromstring(str(s))
			break
	
		positions, position_flags = get_position_and_flags(tree1)
		#for k, v in position_flags.items():
		#	print("{1}: {0}".format(k, v))

		compressed = realize(tree1, positions, position_flags)
	except:
		print("\nno compression performed duo to an error in parse. sentence: {0} \n".format(sentence))
		compressed = sentence
		
	#print ("compressed: ", compressed)

	return compressed

예제 #6

0

파일 보기

파일: gerenate_synsets_final.py 프로젝트: isabella232/Events-in-Text

def get_postags(annotations, sentence,posrange):
    global previous_sentence_tagged
    #posrange = 2
    if str(sentence) in previous_sentence_tagged:
        postagsandwords = previous_sentence_tagged[str(sentence)]
    else:    
        os.environ['STANFORD_PARSER'] = os.getcwd() +'\\jars\\stanford-parser.jar'
        os.environ['STANFORD_MODELS'] = os.getcwd() +'\\jars\\stanford-parser-3.5.2-models.jar'
        os.environ['JAVAHOME'] ="C:\\Program Files (x86)\\Java\\jre1.8.0_73\\bin\\java.exe"
        #os.environ['JAVAHOME'] ="C:\Program Files (x86)\Java\jre1.8.0_66\bin\java.exe"
        #os.environ['JAVAHOME'] ="C:\Program Files (x86)\Java\jre1.8.0_45\\bin\\java.exe"
        #print sentences
        parser = stanford.StanfordParser(model_path=os.getcwd() +"\\jars\\englishPCFG.ser.gz")
        sentenceParsed = parser.parse_one([sentence])
        postagsandwords = sentenceParsed.pos()
        previous_sentence_tagged[str(sentence)] = postagsandwords
    postags = [i[1] for i in postagsandwords]
    #print postags 
    annotations_postags = []
    for annotation in annotations:
        annotationpostags = []
        start = int(annotation[0]) - posrange
        end = int(annotation[1]) + posrange
        for i in range(start, end+1):
            if i < 0:
                annotationpostags.append("S")
            elif i > len(postags)-1:
                annotationpostags.append("E")
            else:
                annotationpostags.append(postags[i])
        annotations_postags.append(annotationpostags)
    
    return annotations_postags

예제 #7

0

파일 보기

def main(argv):

    debug = False

    try:
        opts, args = getopt.getopt(argv, "hd", ["help", "debug"])
    except getopt.GetoptError as e:
        usage()
        sys.exit(2)
    for opt, arg in opts:
        if opt in ["-h", "help"]:
            usage()
            sys.exit(2)
        if opt in ["-d", "debug"]:
            debug = True

    parser = stanford.StanfordParser()

    line = raw_input("Enter line: ")

    while line != 'stop':
        sent = list(parser.raw_parse(line))[0]
        if debug:
            print sent  # print parse tree
        if sent[0].label() == "SBARQ":
            print answer(sent)
        else:
            try:
                describe(sent)
            except ValueError as e:
                print "Error describing sentence. " + e
            if debug:
                print smap  # print semantic map
        line = raw_input("Enter line: ")

예제 #8

0

파일 보기

def find_head(target):
	parser = stanford.StanfordParser(model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz",
	java_options='-mx4g')	
	heads = []
	for sentences in target:
		head = []
		for sentence in sentences:
			flag = 0
			if (len(sentence.split())<200):
				parsing_tree = parser.raw_parse(sentence)							
				for i in (list(parsing_tree))[0].subtrees():
					if i.label() == 'NP':
						leaves = i.leaves()
				rightmost_np = leaves[-1]
				if rightmost_np.isalpha(): 
					head.append(rightmost_np)
					flag = 1
				if flag == 0:
					head.append('null')
			else:
				head.append('null')			
		heads.append(head)
		print('..')	
	fp = open('heads.dat','wb')
	pickle.dump(heads,fp,True)
	fp.close()	
	return heads

예제 #9

0

파일 보기

 def __init__(self):
     os.environ[
         'STANFORD_PARSER'] = '/Volumes/Transcend/stanford/stanford-parser.jar'
     os.environ[
         'STANFORD_MODELS'] = '/Volumes/Transcend/stanford/stanford-parser-3.9.1-models.jar'
     self.parser = stanford.StanfordParser(
         model_path="edu/stanford/nlp/models/lexparser/chinesePCFG.ser.gz")

예제 #10

0

파일 보기

파일: ES_kws_retrieve.py 프로젝트: Hitchcock717/SSRP-Dev

 def __init__(self, index_name, ip):
     os.environ['STANFORD_PARSER'] = '/Volumes/Transcend/stanford/stanford-parser.jar'
     os.environ['STANFORD_MODELS'] = '/Volumes/Transcend/stanford/stanford-parser-3.9.1-models.jar'
     self.parser = stanford.StanfordParser(model_path="edu/stanford/nlp/models/lexparser/chinesePCFG.ser.gz")
     self.index_name = index_name
     # 无用户名密码状态
     self.es = Elasticsearch([ip])

예제 #11

0

파일 보기

    def __init__(self, alg, lang):

        # read settings from config file
        Config = configparser.ConfigParser()
        scriptLocation = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
        Config.read(os.path.join(scriptLocation, 'settings.conf'));
        os.environ['JAVAHOME'] = Config.get('JAVA', 'JAVAHOME')
        os.environ['STANFORD_PARSER'] = Config.get('CORENLP', 'STANFORD_PARSER')
        os.environ['STANFORD_MODELS'] = Config.get('CORENLP', 'STANFORD_MODELS')
        lexParserPath = ""
        if lang == 'de':
            lexParserPath = Config.get('CORENLP', 'LEXPARSER_DE')
        elif lang == 'en':
            lexParserPath = Config.get('CORENLP', 'LEXPARSER_EN')
        else:
            sys.stderr.write("ERROR: Language '%s' not supported. Please use one of the supported languages.\n" % lang)
            sys.exit(1)
        self.lexParser = stanford.StanfordParser(model_path= lexParserPath)
        pccPath = Config.get('MISC', 'PCCPARSER')
        pccLoc = importlib.util.spec_from_file_location("PCCParser", pccPath)
        self.PCCParser = importlib.util.module_from_spec(pccLoc)
        pccLoc.loader.exec_module(self.PCCParser)
        conllPath = Config.get('MISC', 'CONLLPARSER')
        conllLoc = importlib.util.spec_from_file_location('CONLLPARSER', conllPath)
        self.CONLLParser = importlib.util.module_from_spec(conllLoc)
        conllLoc.loader.exec_module(self.CONLLParser)

예제 #12

0

파일 보기

 def __init__(self):
     self.parser = stanford.StanfordParser()
     self.squad = SQuADutil()
     self.squad.get_data(mode="pickle")
     self.neglist = ['``', '\'\'', ',', '.', '?']
     self.NN = ["NN", "NNS", "NNP", "NNPS"]
     self.V = ["VB", "VBG", "VBN", "VBP", "VBZ"]

예제 #13

0

파일 보기

def compress_sent_maxent(sentence):
	try:
		parser = stanford.StanfordParser (
       		model_path          = model_path,
        	path_to_models_jar  = parser_jar
    		)
		#parse
		s_parsed = parser.raw_parse(sentence)
		#form a tree
		for s in s_parsed:
			tree1 = tree.Tree.fromstring(str(s))
			break

		tokens = tree1.leaves()
		N = 5 #beam search parameter
		flags = beam_search(tree1, tokens, N)[N-1]

		position_flags = {}
		positions = tree1.treepositions()
		idx = 0
		for p in positions:
			position_flags[p] = flags[idx]
			idx += 1

		compressed = realize(tree1, tree1.treepositions(), position_flags)
	except:
		compressed = sentence

	return compressed

예제 #14

0

파일 보기

파일: SPO_extractor.py 프로젝트: ExWang/DeepLearningTools

def myMain():
    parser = stanford.StanfordParser(path_to_jar=path_to_jar,
                                     path_to_models_jar=path_to_models_jar,
                                     model_path=model_path)

    test_sentences = (
        "A man in white shirt on bicycle with a dog riding in the back.",
        "several young students working at a desk with multiple computers",
        "a person running a bike on a road with trees in the background",
        "A white tank with an book on it in classroom.",
        "A young woman standing in a kitchen eats a plate of vegetables.",
        "A man in a red shirt and a red hat is on a motorcycle on a hill side",
        "Stone keepers on the ground is holding a gem of time.",
        "Two chefs in a restaurant kitchen preparing food. ",
        "A commercial dish washing station with a toilet in it.",
        "A geoup of people on bicycles coming down a street.",
        "a bathroom with a toilet between a sink and a shower",
        "Elephant walking through the middle of the road in front of a car. ",
        "A horse drawn carriage among several other motor vehicles on a road.",
        "A car is parked near a parking meter.",
        "a row of bikes and mopeds is parked along the street",
    )

    sentences = parser.raw_parse_sents(test_sentences)

    for line in sentences:
        for sentence in line:
            # sentence.draw()
            zhuweibin = myFilter(sentence)

예제 #15

0

파일 보기

파일: testscript.py 프로젝트: jinpoon/11611_project

 def __init__(self, productions_filename=None):
     super(QuestionEvaluator, self).__init__()
     self.parser = stanford.StanfordParser(encoding='utf8')
     if (productions_filename != None):
         self.read_productions(productions_filename)
     else:
         self.grammar = None

예제 #16

0

파일 보기

 def clear_data(self):
     self.parser = stanford.StanfordParser(model_path=STANFORD_MODEL_PATH)
     self.first_NP = ''
     self.first_VP = ''
     self.parse_tree = None
     self.subject = RDF_Triple.RDF_SOP('subject')
     self.predicate = RDF_Triple.RDF_SOP('predicate', 'VB')
     self.Object = RDF_Triple.RDF_SOP('object')

예제 #17

0

파일 보기

파일: generate_trees_flickr.py 프로젝트: shafiahmed/imgcap

def init_parser():
    print 'Initializing parser...'
    # initializes the parser
    os.environ['STANFORD_PARSER'] = PARSER_LOC
    os.environ['STANFORD_MODELS'] = MODEL_LOC
    parser = stanford.StanfordParser(model_path=PCFG_LOC)
    print 'Complete'
    return parser

예제 #18

0

파일 보기

파일: corpus_cleaner.py 프로젝트: zhaoxiaoliang-clh/emnlp2018-april

    def __init__(self, datasets_path, corpus_name, parse_type, lang='english'):
        self.datasets_path = datasets_path
        self.corpus_name = corpus_name
        self.corpus_path = path.join(datasets_path, corpus_name)
        self.docs_path = path.join(self.corpus_path, "docs")
        self.topics_file = path.join(self.corpus_path, "topics.xml")
        self.models_path = path.join(self.corpus_path, "models")
        self.smodels_path = path.join(self.corpus_path, "smodels")
        self.jar_path = JAR_PATH
        os.environ['CLASSPATH'] = self.jar_path
        self.cleaned_path = path.join(datasets_path, "processed_data")

        if parse_type == 'parse':
            if lang == 'english':
                self.parser = stanford.StanfordParser(model_path="%s/englishPCFG.ser.gz" % (self.jar_path))
            if lang == 'german':
                self.parser = stanford.StanfordParser(model_path="%s/germanPCFG.ser.gz" % (self.jar_path))

예제 #19

0

파일 보기

파일: gap_selection.py 프로젝트: pratimaupadhyay02/Automatic-Question-Generation

 def _prepare_parser(self):
     """Prepare stanford parser
     - Args:
     - Returns:
         parser: stanfrod parser
     """
     parser = stanford.StanfordParser(model_path=self.model_path)
     return parser

예제 #20

0

파일 보기

def test_parser():
    import nltk
    from nltk.parse import stanford
    from nltk.parse.stanford import StanfordParser
    os.environ[
        'STANFORD_PARSER'] = 'F:/eclipse_doctor/KnowledgeGraph/stanford-parser/stanford-parser.jar'
    os.environ[
        'STANFORD_MODELS'] = 'F:/eclipse_doctor/KnowledgeGraph/stanford-parser/stanford-parser-3.7.0-models.jar'
    java_path = "C:/ProgramData/Oracle/Java/javapath"
    os.environ['JAVAHOME'] = java_path

    start = datetime.now()
    print start
    parser = stanford.StanfordParser(
        model_path=
        "F:/eclipse_doctor/KnowledgeGraph/stanford-parser/englishPCFG.ser.gz")
    end = datetime.now()
    print end
    print "cost time: " + str((end - start).microseconds)

    sent = 'angulated abutment is an abutment whose body is not parallel to the long axis of the implant. It is utilized when the implant is at a different inclination in relation to the proposed prosthesis.'
    start = datetime.now()
    print start
    trees = parser.parse(sent.split())
    end = datetime.now()
    print end
    print "cost time: " + str((end - start).microseconds)
    print 'len(trees)', len(list(trees))

    path_project = os.path.abspath(
        os.path.join(os.getcwd(), os.pardir, os.pardir))
    path_data = path_project + os.sep + "input" + os.sep + "items_tagged_modified.json"
    data = json.load(codecs.open(path_data, encoding='UTF-8'))
    start_all = datetime.now()
    cnt = 0
    trees_all = []
    for item in data:
        pos2definition = item["pos2definition"]
        for pos2def in pos2definition:
            definition = pos2def["definition"]
            definition_pure = re.sub(r'\([\s\S]*?\)', "", definition)
            text = nltk.word_tokenize(definition_pure)
            sents_pos_period = cut_list(text, ['.'])
            for sent_list in sents_pos_period:
                cnt += 1
                start = datetime.now()
                #                 print start
                trees = parser.parse(' '.join(sent_list).split())
                trees_all.append(trees)
                end = datetime.now()
#                 print end
#                 print "cost time: "+str((end - start).microseconds)
    end_all = datetime.now()
    print end_all
    sum_time = (end_all - start_all).seconds
    sum_time_mic = (end_all - start_all).microseconds
    avg_time = (end_all - start_all).seconds * 1.0 / cnt
    print sum_time, sum_time_mic, avg_time, cnt

예제 #21

0

파일 보기

파일: test.py 프로젝트: INTFREE/bop

    def __init__(self):
        os.environ['STANFORD_PARSER'] = STANFORD_PARSER_PATH
        os.environ['STANFORD_MODELS'] = STANFORD_MODELS_PATH
        os.environ['JAVAHOME'] = JAVA_HOME
        stanford_model_path = CHINESE_MODEL_PATH
        self.s_parser = stanford.StanfordParser(model_path=stanford_model_path)

        par_model_path = os.path.join(
            LTP_DATA_DIR, 'parser.model')  # 依存句法分析模型路径，模型名称为`parser.model`

        from pyltp import Parser
        self.parser = Parser()  # 初始化实例
        self.parser.load(par_model_path)  # 加载模型

        cws_model_path = os.path.join(LTP_DATA_DIR,
                                      'cws.model')  # 分词模型路径，模型名称为`cws.model`

        from pyltp import Segmentor
        self.segmentor = Segmentor()  # 初始化实例
        self.segmentor.load(cws_model_path)  # 加载模型

        pos_model_path = os.path.join(LTP_DATA_DIR,
                                      'pos.model')  # 词性标注模型路径，模型名称为`pos.model`

        from pyltp import Postagger
        self.postagger = Postagger()  # 初始化实例
        self.postagger.load(pos_model_path)  # 加载模型

        ner_model_path = os.path.join(
            LTP_DATA_DIR, 'ner.model')  # 命名实体识别模型路径，模型名称为`pos.model`

        from pyltp import NamedEntityRecognizer
        self.recognizer = NamedEntityRecognizer()  # 初始化实例
        self.recognizer.load(ner_model_path)  # 加载模型

        q_words = {
            'q1_person': ['谁', '那个', '哪个'],
            'q1_time': ['那年', '时间', '哪年', '何时', '多久', '时候', '年'],
            'q1_amount': ['多', '几', '多少', '第几'],
            'q1_place': ['哪儿', '哪家', '哪里人', '哪里', '那家', '那里人', '那里'],
            'q1_result': ['怎么', '为什么', '为何', '如何', '何'],
            'q1_judge': ['是否', '还是', '吗'],
            'q0_other': ['哪些', '那些', '干什么'],
            'q0_definition': ['什么样', '什么', '怎么样', '怎样'],
        }
        self.question_words = []
        self.word2key = {}

        for k, v in q_words.items():
            self.question_words += v
            for _v in v:
                self.word2key[_v] = k

        self.stop_words = set()
        with open('../data/all-stop-word.txt') as f_stop:
            for i in f_stop.readlines():
                self.stop_words.add(i.strip())
        self.articles = []

예제 #22

0

파일 보기

 def init_data(self):
     self.parser = stanford.StanfordParser(model_path=r"/home/mael/MASTER_2/TEXTE/RDF-Triple-API-master/stanford-parser-full-2015-01-30/edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz")
     self.NP = ''
     self.VP = ''
     #On recupere l'arbre du parseur stanford
     self.stanford_tree = self.parser.raw_parse(self.sentence)[0]
     self.subject = Extractor.RDF_ELEMENT('subject')
     self.predicate = Extractor.RDF_ELEMENT('predicate')
     self.Object = Extractor.RDF_ELEMENT('object')

예제 #23

0

파일 보기

파일: FCA_Algorithm_Final.py 프로젝트: meharbhatia/SIREN_WorkLog

    def __init__(self):

        #Initialize the SVO Methods

        self.noun_types = ["NN", "NNP", "NNPS", "NNS", "PRP"]
        self.verb_types = ["VB", "VBD", "VBG", "VBN", "VBP", "VBZ"]
        self.adjective_types = ["JJ", "JJR", "JJS"]
        self.pred_verb_phrase_siblings = None
        self.parser = stanford.StanfordParser()
        self.sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')

예제 #24

0

파일 보기

def mainON(field2word, subset):
    ''' process data, from .qu, .lo, and .fi
        to .ta, .lox, .qux
        and .ficorr, .vacorr
    '''

    sub_folder = subset.split('_')[0]
    data_root = FLAGS.data_root
    os.environ['STANFORD_PARSER'] = FLAGS.stanford_parser
    os.environ['STANFORD_MODELS'] = FLAGS.stanford_models
    parser = stanford.StanfordParser(model_path=FLAGS.stanford_model_path)
    schema = ' '.join(field2word.keys())

    if not path.isdir(join(data_root, 'overnight_generated')):
        os.makedirs(join(data_root, 'overnight_generated'))

    (f_ta, f_lox, f_qux, f_ficorr, f_vacorr) = [
        open(
            join(data_root, 'overnight_generated', '%s.%s' % (subset, suffix)),
            'w') for suffix in ['ta', 'lox', 'qux', 'ficorr', 'vacorr']
    ]

    with open(data_root + 'overnight_source/%s/%s.qu' %
              (sub_folder, subset)) as f_qu, open(
                  data_root + 'overnight_source/%s/%s.lon' %
                  (sub_folder, subset)) as f_lo:
        query, logic = f_qu.readline(), f_lo.readline()
        idx = 0
        while query and logic:
            idx += 1
            print '### example: %d ###' % idx
            print query
            print logic
            tagged2, field_corr, value_corr, newquery, newlogical = codebase.tagger.sentTagging_treeON3(
                parser, field2word, query, schema, logic)
            print field_corr
            print value_corr
            print tagged2
            print newquery
            print newlogical
            print '\n'
            f_qux.write(newquery + '\n')
            f_lox.write(newlogical + '\n')
            f_ficorr.write(field_corr + '\n')
            f_vacorr.write(value_corr + '\n')
            f_ta.write(tagged2 + '\n')
            query, logic = f_qu.readline(), f_lo.readline()

    f_ta.close()
    f_lox.close()
    f_qux.close()
    f_vacorr.close()
    f_ficorr.close()
    return

예제 #25

0

파일 보기

def get_parses(sentences):
    os.environ['CLASSPATH'] = dir+'stanford-parser'
    os.environ['STANFORD_PARSER'] = dir+'stanford-parser/stanford-parser.jar'
    os.environ['STANFORD_MODELS'] = dir+'stanford-parser/stanford-parser-3.6.0-models.jar'
    parser = stanford.StanfordParser(model_path=dir+"stanford-parser/models/edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz")
    iter_trees = parser.raw_parse_sents(sentences) # this line might take quite a few seconds if the file size is large
    list_trees = []
    for iter_tree in iter_trees:
        for tree in iter_tree:
            list_trees.append(tree)
    return list_trees

예제 #26

0

파일 보기

 def clear_data(self):
     self.parser = stanford.StanfordParser(
         model_path=
         r"/users/ted/stanford nlp/stanford-parser-full-2015-01-30/stanford-parser-3.5.1-models/edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
     )
     self.first_NP = ''
     self.first_VP = ''
     self.parse_tree = None
     self.subject = RDF_Triple.RDF_SOP('subject')
     self.predicate = RDF_Triple.RDF_SOP('predicate', 'VB')
     self.Object = RDF_Triple.RDF_SOP('object')

예제 #27

0

파일 보기

파일: Preprocessing2.py 프로젝트: cristianion94/Chatbot

def stanford_parser_tree(sentences):
    os.environ[
        'STANFORD_PARSER'] = 'D:/Facultate anul 3/AI/Projectlibraries/stanford-parser-full-2016-10-31/stanford-parser.jar'
    os.environ[
        'STANFORD_MODELS'] = 'D:/Facultate anul 3/AI/Projectlibraries/stanford-parser-full-2016-10-31/stanford-parser-3.7.0-models.jar'

    parser = stanford.StanfordParser(
        model_path=
        "E:/FII/3/IA/stanford-parser-and-models/jars/englishPCFG.ser.gz")
    sentences = parser.raw_parse_sents((sentences))
    return sentences

예제 #28

0

파일 보기

 def get_parser(self):
     os.environ['CLASSPATH'] = dir + 'stanford-parser'
     os.environ[
         'STANFORD_PARSER'] = dir + 'stanford-parser/stanford-parser.jar'
     os.environ[
         'STANFORD_MODELS'] = dir + 'stanford-parser/stanford-parser-3.6.0-models.jar'
     parser = stanford.StanfordParser(
         model_path=dir +
         "stanford-parser/models/edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
     )
     return parser

예제 #29

0

파일 보기

    def __init__(self, stanford_parser_path):
        sep = classpath_separator[system()]

        if 'CLASSPATH' not in os.environ:
            os.environ['CLASSPATH'] = ".{}".format(sep)
        os.environ['CLASSPATH'] += "{}{}".format(stanford_parser_path, sep)

        print(os.environ['CLASSPATH'])

        self.parser = stanford.StanfordParser(
            model_path="{}/englishPCFG.ser.gz".format(stanford_parser_path))

예제 #30

0

파일 보기

    def __init__(self):

        self.config = configparser.ConfigParser()
        self.config.read('config.ini')
        os.environ['JAVAHOME'] = self.config['lexparser']['javahome']
        os.environ['STANFORD_PARSER'] = self.config['lexparser'][
            'stanfordParser']
        os.environ['STANFORD_MODELS'] = self.config['lexparser'][
            'stanfordModels']
        os.environ['CLASSPATH'] = self.config['lexparser']['path']
        self.lexParser = stanford.StanfordParser(
            model_path=self.config['lexparser']['germanModel'])