def sdfprocess(rvdata, partidx): os.environ["MALT_PARSER"] = "/home/cosmo/Dropbox/Purdue/nlp/maltparser-1.8" parser = MaltParser( mco='engmalt.poly-1.7', working_dir='/home/cosmo/Dropbox/Purdue/nlp/maltparser-1.8', additional_java_args=['-Xmx5000m']) sdfdata = [] cnn = 1 # demo() print parser.raw_parse("I am a student.") for eg in rvdata: if cnn % 100 == 0: print "%f%% of document %d finished" % (cnn * 100 * 1.0 / len(rvdata), partidx + 1) cmt = eg[3].decode('utf-8') #3 is the idx of comment sentences = nltk.sent_tokenize(cmt) sdfparsed = [parser.raw_parse(sentence) for sentence in sentences] sdfdata.append(eg[:3] + [sdfparsed]) # print cnn print sdfparsed # print sdfdata cnn += 1 if cnn > 5: break return sdfdata
def dep_parse(self, sentence='every cat leaves'): #Lazy-initialize the depparser if self.depparser is None: self.depparser = MaltParser(tagger=self.get_pos_tagger()) if not self.depparser._trained: self.train_depparser() return [self.depparser.parse(sentence, verbose=self.verbose)]
def parse_sents(self, sents): os.environ['MALT_PARSER'] = config['MALT_WORKING_DIR'] + '/malt.jar' parser = NltkMaltParser(working_dir=config['MALT_WORKING_DIR'], mco=config['MALT_MCO'], additional_java_args=config['MALT_JAVA_ARGS'], tagger=self.tagger) graphs = [list(graph)[0] for graph in parser.parse_sents(sents)] # Sometimes, there is an empty graph at the end of the list. Delete it. if len(graphs) > 0 and len(graphs[-1].nodes) == 1: del graphs[-1] return graphs
class Parser(object): '''Write something here.''' nouns = defaultdict(set) verbs = defaultdict(set) def __init__(self, all_modules, malt_working_dir=None, malt_mco=None): '''...''' if not malt_working_dir: import os malt_working_dir = os.environ['MALTPARSERHOME'] if not malt_mco: malt_mco = 'engmalt.linear-1.7' self._parser = MaltParser(working_dir=malt_working_dir, mco=malt_mco, additional_java_args=['-Xmx512m']) for module in all_modules: for noun in module.nouns: self.nouns[noun].add(module) for verb in module.verbs: self.verbs[verb].add(module) def _is_verb(self, word): return word in self.verbs def _is_noun(self, word): return word in self.nouns def _check_command(self, noun, verb): if not noun or not verb: return False print 'Noun:', noun print 'Verb:', verb return self.nouns[noun] & self.verbs[verb] def parse(self, command): '''...''' words = word_tokenize(command) pos = pos_tag(words) graph = self._parser.tagged_parse(pos) tree = graph.tree() print "Parse Tree: ", tree c_noun = None c_verb = None for subtree in tree.subtrees(): words = [x for x in subtree if not isinstance(x, Tree)] + [subtree.node] for x in words: # TODO: Use the tagged sentence so that we can resolve this better if self._is_verb(x): c_verb = x elif self._is_noun(x): c_noun = x command_modules = self._check_command(c_noun, c_verb) if command_modules: yield Command(command, c_noun, c_verb, [x for x in words if x not in (c_noun, c_verb)]) c_noun = None
def setup_module(): import pytest from nltk.parse.malt import MaltParser try: depparser = MaltParser("maltparser-1.7.2") except LookupError as e: pytest.skip("MaltParser is not available")
def setup_module(module): from nose import SkipTest from nltk.parse.malt import MaltParser try: depparser = MaltParser("maltparser-1.7.2") except LookupError: raise SkipTest("MaltParser is not available")
def demo(): discourse_demo() tagger = RegexpTagger([('^(chases|runs)$', 'VB'), ('^(a)$', 'ex_quant'), ('^(every)$', 'univ_quant'), ('^(dog|boy)$', 'NN'), ('^(he)$', 'PRP')]) depparser = MaltParser(tagger=tagger) drt_discourse_demo( DrtGlueReadingCommand(remove_duplicates=False, depparser=depparser))
def mltprocess(tp, path, filenamels, docid): parser=MaltParser(working_dir='/home/cosmo/Dropbox/Purdue/nlp/maltparser-1.8/maltparser-1.8.jar', mco='engmalt.poly-1.7.mco', additional_java_args='-mx5000m') sdfdata = [] for i in range(len(filenamels)): if (i+1)%100 == 0: print "%f%% of document %d of %s finished" % ((i+1)*100*1.0/len(filenamels), docid, tp) filename = filenamels[i] h = open(path + filename, 'r') lines = h.readlines() h.close() headraw, bodyraw = preprocess(lines[0]), preprocess(lines[1]) sentences = [headraw] + nltk.sent_tokenize(bodyraw) sdfparsed = [parser.raw_parse(sentence) for sentence in sentences] sdfdata.append(sdfparsed) # print sdfparsed # print sdfdata # if i > 5: break return sdfdata
def setup_module(): import pytest from nltk.parse.malt import MaltParser try: depparser = MaltParser() except (AssertionError, LookupError) as e: pytest.skip("MaltParser is not available")
def _init_glue(self): tagger = RegexpTagger([ ('^(David|Mary|John)$', 'NNP'), ('^(walks|sees|eats|chases|believes|gives|sleeps|chases|persuades|tries|seems|leaves)$', 'VB'), ('^(go|order|vanish|find|approach)$', 'VB'), ('^(a)$', 'ex_quant'), ('^(every)$', 'univ_quant'), ('^(sandwich|man|dog|pizza|unicorn|cat|senator)$', 'NN'), ('^(big|gray|former)$', 'JJ'), ('^(him|himself)$', 'PRP') ]) depparser = MaltParser(tagger=tagger) self._glue = DrtGlue(depparser=depparser, remove_duplicates=False)
def demo(): discourse_demo() tagger = RegexpTagger([ ("^(chases|runs)$", "VB"), ("^(a)$", "ex_quant"), ("^(every)$", "univ_quant"), ("^(dog|boy)$", "NN"), ("^(he)$", "PRP"), ]) depparser = MaltParser(tagger=tagger) drt_discourse_demo( DrtGlueReadingCommand(remove_duplicates=False, depparser=depparser))
class MaltParser(DependencyParserWrapper): # currently this can only be run from the malt parser directory # TODO: make more general/easy to run def __init__(self, version = "maltparser-1.9.2", model = "engmalt.linear-1.7.mco"): super().__init__() root = os.getcwd() version_path = os.path.join(root, "schemata", "parse", "maltparser", version ) model_path =os.path.join(root, "schemata", "parse", "maltparser", model ) self.base = MP(version_path, model_path) def get_spans(self, sent): dparse = self.base.parse_one(sent.split()) heads = [node['head'] for _, node in sorted(dparse.nodes.items())][1:] tree = DependencyParserWrapper.head_to_tree(heads) non_singletons = DependencyParserWrapper.compute_spans(tree) singletons = [(n, n+1) for n in range(len(heads))] return set(non_singletons) | set(singletons)
def __init__(self, all_modules, malt_working_dir=None, malt_mco=None): '''...''' if not malt_working_dir: import os malt_working_dir = os.environ['MALTPARSERHOME'] if not malt_mco: malt_mco = 'engmalt.linear-1.7' self._parser = MaltParser(working_dir=malt_working_dir, mco=malt_mco, additional_java_args=['-Xmx512m']) for module in all_modules: for noun in module.nouns: self.nouns[noun].add(module) for verb in module.verbs: self.verbs[verb].add(module)
def demo(show_example=-1): examples = ['David sees Mary', 'David eats a sandwich', 'every man chases a dog', 'every man believes a dog sleeps', 'John gives David a sandwich', 'John chases himself', # 'John persuades David to order a pizza', # 'John tries to go', # 'John tries to find a unicorn', # 'John seems to vanish', # 'a unicorn seems to approach', 'every big cat leaves', 'every gray cat leaves', 'every big gray cat leaves', 'a former senator leaves'] print '============== DEMO ==============' tagger = RegexpTagger( [('^(David|Mary|John)$', 'NNP'), ('^(sees|eats|chases|believes|gives|sleeps|chases|persuades|tries|seems|leaves)$', 'VB'), ('^(go|order|vanish|find|approach)$', 'VB'), ('^(a)$', 'ex_quant'), ('^(every)$', 'univ_quant'), ('^(sandwich|man|dog|pizza|unicorn|cat|senator)$', 'NN'), ('^(big|gray|former)$', 'JJ'), ('^(him|himself)$', 'PRP') ]) depparser = MaltParser(tagger=tagger) glue = Glue(depparser=depparser, verbose=False) for (i, sentence) in enumerate(examples): if i==show_example or show_example==-1: print '[[[Example %s]]] %s' % (i, sentence) for reading in glue.parse_to_meaning(sentence): print reading.simplify() print ''
#!/usr/bin/env python from nltk.parse.malt import MaltParser parser = MaltParser('maltparser-1.8.1','espmalt-1.0.mco') txt="This is a test sentence" parser.train_from_file('Tibidabo_Treebank.txt') parser.raw_parse(txt)
import sys from nltk.parse.malt import MaltParser PATH_TO_MALTPARSER = "maltparser-1.9.2" PATH_TO_MODEL = "kaist-conll.mco" def print_usage(): print("usage: $ python3 parser.py <input text>") if __name__ == '__main__': argv = sys.argv[1:] argc = len(sys.argv) if (argc != 2): print_usage() sys.exit() user_input = argv[0] tokens = user_input.split() mp = MaltParser(PATH_TO_MALTPARSER, PATH_TO_MODEL) graph = mp.parse_one(tokens).tree() print(graph)
def get_feature_names(pairs): feature_names = [] c_vect = CountVectorizer(min_df=5, ngram_range=(1,3), tokenizer=word_tokenize) pos_vect = CountVectorizer(tokenizer=iterate_pos, ngram_range=(1,3), lowercase=False) mp = MaltParser("/home/lena/opt/maltparser-1.9.2","/home/lena/opt/russian.mco") model_d = Doc2Vec.load('vec/model_d.w2v') model_w = Word2Vec.load('vec/model_w.w2v') DataDict = {'edu1_position': [], 'edu2_position': [], 'edu1_endsent': [], 'edu1_startsent': [], 'edu2_endsent': [], 'edu2_startsent': [], 'edu1_len': [], 'edu2_len': [], 'same_tokens': [], 'distance': [], 'attribution1': [], 'cause-effect1': [], 'concession1': [], 'condition1': [], 'contrast1': [], 'elaboration1': [], 'joint1': [], 'purpose1' :[], 'attribution2': [], 'cause-effect2': [], 'concession2': [], 'condition2': [], 'contrast2': [], 'elaboration2': [], 'joint2': [], 'purpose2' :[]} for pair in pairs: markers_dict1 = has_markers(pair.edu1.lemmatized_tokens) markers_dict2 = has_markers(pair.edu2.lemmatized_tokens) DataDict['edu1_position'].append(int(pair.edu1.position)) DataDict['edu2_position'].append(int(pair.edu2.position)) DataDict['edu1_endsent'].append(int(pair.edu1.sentence_end)) DataDict['edu2_endsent'].append(int(pair.edu2.sentence_end)) DataDict['edu1_startsent'].append(int(pair.edu1.sentence_start)) DataDict['edu2_startsent'].append(int(pair.edu2.sentence_start)) DataDict['edu1_len'].append(len(pair.edu1.tokens)) DataDict['edu2_len'].append(len(pair.edu2.tokens)) # количество совпадающих токенов (леммы) DataDict['same_tokens'].append(len(set(pair.edu1.lemmatized_tokens).intersection( pair.edu2.lemmatized_tokens))) DataDict['distance'].append(int(pair.edu2.position)-int(pair.edu1.position)-1) for rel_name in ['attribution','cause-effect','concession','condition','contrast','elaboration','joint','purpose']: DataDict[rel_name+'1'].append(markers_dict1[rel_name]) DataDict[rel_name+'2'].append(markers_dict2[rel_name]) X = pd.DataFrame(DataDict) feature_names.extend(X.columns) # векторайзер по словам all_texts = [pair.edu1.text for pair in pairs] + [pair.edu2.text for pair in pairs] c_vect.fit(all_texts) # edus1_vect = c_vect.transform([pair.edu1.text for pair in pairs]) # edus2_vect = c_vect.transform([pair.edu2.text for pair in pairs]) feature_names.extend([i+'1' for i in c_vect.get_feature_names() ]) feature_names.extend([i+'2' for i in c_vect.get_feature_names() ]) # векторайзер по тегам частей речи all_pos = [pair.edu1.pos for pair in pairs] + [pair.edu2.pos for pair in pairs] pos_vect.fit(all_pos) pos1_vect = pos_vect.transform([pair.edu1.pos for pair in pairs]) pos2_vect = pos_vect.transform([pair.edu2.pos for pair in pairs]) feature_names.extend([i+'1' for i in pos_vect.get_feature_names() ]) feature_names.extend([i+'2' for i in pos_vect.get_feature_names() ]) # Doc2Vec - вектор ЭДЕ d2v1 = csr_matrix(np.array([model_d.infer_vector(pair.edu1.tokens) for pair in pairs])) d2v2 = csr_matrix(np.array([model_d.infer_vector(pair.edu1.tokens) for pair in pairs])) feature_names.extend(['d2v1'+str(i) for i in range(100)]) feature_names.extend(['d2v2'+str(i) for i in range(100)]) # Word2Vec - векторы первого и последнего токена в ЭДЕ w2v1_first = csr_matrix(np.array([w2v_word_vector(model_w, pair.edu1.tokens[0]) for pair in pairs])) w2v2_first = csr_matrix(np.array([w2v_word_vector(model_w, pair.edu2.tokens[0]) for pair in pairs])) w2v1_last = csr_matrix(np.array([w2v_word_vector(model_w, pair.edu1.tokens[-1]) for pair in pairs])) w2v2_last = csr_matrix(np.array([w2v_word_vector(model_w, pair.edu2.tokens[-1]) for pair in pairs])) feature_names.extend(['w2v1_first'+str(i) for i in range(100)]) feature_names.extend(['w2v2_first'+str(i) for i in range(100)]) feature_names.extend(['w2v1_last'+str(i) for i in range(100)]) feature_names.extend(['w2v2_last'+str(i) for i in range(100)]) # # СИНТАКСИС Word2Vec - векторы head ЭДЕ, POS-теги head ЭДЕ # head_ids_edu1 = [detect_head(mp.parse_one(pair.edu1.tokens)) for pair in pairs] # head_ids_edu2 = [detect_head(mp.parse_one(pair.edu2.tokens)) for pair in pairs] # head_vectors_edu1 = csr_matrix(np.array([w2v_word_vector(model_w, pairs[i].edu1.tokens[head_ids_edu1[i]]) for i in range(len(pairs))])) # head_vectors_edu2 = csr_matrix(np.array([w2v_word_vector(model_w, pairs[i].edu2.tokens[head_ids_edu2[i]]) for i in range(len(pairs))])) # head_pos_edu1 = pos_vect.transform([[pairs[i].edu1.pos[head_ids_edu1[i]]] for i in range(len(pairs))]) # head_pos_edu2 = pos_vect.transform([[pairs[i].edu2.pos[head_ids_edu2[i]]] for i in range(len(pairs))]) # X_sparse = csr_matrix(np.array(X)) # X_concat = hstack((X_sparse, edus1_vect, edus2_vect, pos1_vect, pos2_vect, d2v1, d2v2, # w2v1_first, w2v2_first, w2v1_last, w2v2_last)) print(len(feature_names)) return feature_names
def drt_discourse_demo(reading_command=None): """ Illustrate the various methods of C{DiscourseTester} """ dt = DiscourseTester(['every dog chases a boy', 'he runs'], reading_command) dt.models() print dt.sentences() print dt.readings() print dt.readings(show_thread_readings=True) print dt.readings(filter=True, show_thread_readings=True) def spacer(num=30): print '-' * num if __name__ == '__main__': discourse_demo() tagger = RegexpTagger([('^(chases|runs)$', 'VB'), ('^(a)$', 'ex_quant'), ('^(every)$', 'univ_quant'), ('^(dog|boy)$', 'NN'), ('^(he)$', 'PRP')]) depparser = MaltParser(tagger=tagger) drt_discourse_demo( DrtGlueReadingCommand(remove_duplicates=False, depparser=depparser))
# S VP NP Bob <-> Bob Np VP S # always the reversed list goes first and that's it? sublist1 = path1[j:] #print("sublist1",sublist1) if j < len(path2) - 1: j = j + 1 sublist2 = path2[j:] #print("sublist2",sublist2) sublist2.reverse() #print("sublist2",sublist2) shortestpath = sublist2 + sublist1 return shortestpath dparser = MaltParser('../data/grammars/maltparser-1.8.1/', 'engmalt.linear-1.7.mco') pt = dparser.parse_one('I shot an elephant in my pajamas .'.split()).tree() # print(pt) # print(pt) # print(shortestPath(pt,'I','pajamas')) # print(shortestPath(pt,'I','pajamas')) # print(shortestPath(pt,'elephant','pajamas')) # print(shortestPath(pt,'I','elephant')) # parsing many sentences tagged_sents = [ "The other day I went to the beach.".split(), "It was a hot day so I swimmed in the water.".split()
class Glue(object): def __init__(self, semtype_file=None, remove_duplicates=False, depparser=None, verbose=False): self.verbose = verbose self.remove_duplicates = remove_duplicates self.depparser = depparser if semtype_file: self.semtype_file = semtype_file else: self.semtype_file = 'glue.semtype' def train_depparser(self, depgraphs=None): if depgraphs: self.depparser.train(depgraphs) else: self.depparser.train_from_file(data.find(os.path.join('grammars', 'sample_grammars', 'glue_train.conll'))) def parse_to_meaning(self, sentence): readings = [] for agenda in self.parse_to_compiled(sentence): readings.extend(self.get_readings(agenda)) return readings def get_readings(self, agenda): readings = [] agenda_length = len(agenda) atomics = dict() nonatomics = dict() while agenda: # is not empty cur = agenda.pop() glue_simp = cur.glue.simplify() if isinstance(glue_simp, linearlogic.ImpExpression): # if cur.glue is non-atomic for key in atomics: try: if isinstance(cur.glue, linearlogic.ApplicationExpression): bindings = cur.glue.bindings else: bindings = linearlogic.BindingDict() glue_simp.antecedent.unify(key, bindings) for atomic in atomics[key]: if not (cur.indices & atomic.indices): # if the sets of indices are disjoint try: agenda.append(cur.applyto(atomic)) except linearlogic.LinearLogicApplicationException: pass except linearlogic.UnificationException: pass try: nonatomics[glue_simp.antecedent].append(cur) except KeyError: nonatomics[glue_simp.antecedent] = [cur] else: # else cur.glue is atomic for key in nonatomics: for nonatomic in nonatomics[key]: try: if isinstance(nonatomic.glue, linearlogic.ApplicationExpression): bindings = nonatomic.glue.bindings else: bindings = linearlogic.BindingDict() glue_simp.unify(key, bindings) if not (cur.indices & nonatomic.indices): # if the sets of indices are disjoint try: agenda.append(nonatomic.applyto(cur)) except linearlogic.LinearLogicApplicationException: pass except linearlogic.UnificationException: pass try: atomics[glue_simp].append(cur) except KeyError: atomics[glue_simp] = [cur] for entry in atomics: for gf in atomics[entry]: if len(gf.indices) == agenda_length: self._add_to_reading_list(gf, readings) for entry in nonatomics: for gf in nonatomics[entry]: if len(gf.indices) == agenda_length: self._add_to_reading_list(gf, readings) return readings def _add_to_reading_list(self, glueformula, reading_list): add_reading = True if self.remove_duplicates: for reading in reading_list: try: if reading.tp_equals(glueformula.meaning, 'Prover9'): add_reading = False break; except: #if there is an exception, the syntax of the formula #may not be understandable by the prover, so don't #throw out the reading. pass if add_reading: reading_list.append(glueformula.meaning) def parse_to_compiled(self, sentence='a man sees Mary'): gfls = [self.depgraph_to_glue(dg) for dg in self.dep_parse(sentence)] return [self.gfl_to_compiled(gfl) for gfl in gfls] def dep_parse(self, sentence='every cat leaves'): #Lazy-initialize the depparser if self.depparser is None: self.depparser = MaltParser(tagger=self.get_pos_tagger()) if not self.depparser._trained: self.train_depparser() return [self.depparser.parse(sentence, verbose=self.verbose)] def depgraph_to_glue(self, depgraph): return self.get_glue_dict().to_glueformula_list(depgraph) def get_glue_dict(self): return GlueDict(self.semtype_file) def gfl_to_compiled(self, gfl): index_counter = Counter() return_list = [] for gf in gfl: return_list.extend(gf.compile(index_counter)) if self.verbose: print 'Compiled Glue Premises:' for cgf in return_list: print cgf return return_list def get_pos_tagger(self): regexp_tagger = RegexpTagger( [(r'^-?[0-9]+(.[0-9]+)?$', 'CD'), # cardinal numbers (r'(The|the|A|a|An|an)$', 'AT'), # articles (r'.*able$', 'JJ'), # adjectives (r'.*ness$', 'NN'), # nouns formed from adjectives (r'.*ly$', 'RB'), # adverbs (r'.*s$', 'NNS'), # plural nouns (r'.*ing$', 'VBG'), # gerunds (r'.*ed$', 'VBD'), # past tense verbs (r'.*', 'NN') # nouns (default) ]) brown_train = brown.tagged_sents(categories='news') unigram_tagger = UnigramTagger(brown_train, backoff=regexp_tagger) bigram_tagger = BigramTagger(brown_train, backoff=unigram_tagger) trigram_tagger = TrigramTagger(brown_train, backoff=bigram_tagger) #Override particular words main_tagger = RegexpTagger( [(r'(A|a|An|an)$', 'ex_quant'), (r'(Every|every|All|all)$', 'univ_quant') ], backoff=trigram_tagger) return main_tagger
def get_maltparse_tagger(): maltparse_dir = os.environ['MALTPARSE_DIR'] maltparse_model = os.path.join(maltparse_dir, 'TRL_maltparser_modul_ES.rar') ##Provided by IULA asa pretrained modelbut needs to be .mco filenot rar. return MaltParser(maltparse_dir)
] # hack for subprocess.DEVNULL on python 2.7 try: from subprocess import DEVNULL # py3k except ImportError: import os import subprocess subprocess.DEVNULL = open(os.devnull, 'wb') os.environ['STANFORD_MODELS'] = 'libs/stanford-postagger-2018-10-16/models' tagger = StanfordPOSTagger( 'english-bidirectional-distsim.tagger', 'libs/stanford-postagger-2018-10-16/stanford-postagger.jar') parser = MaltParser(os.path.dirname(os.path.abspath(__file__)) + '/libs/maltparser-1.9.1', 'libs/engmalt.linear-1.7.mco', tagger=tagger.tag) stemmer = WordNetLemmatizer() def wsd_of(tree, node): head, pobj = getLink(tree, node, 'head'), getLink(tree, node, 'dep:pobj') if head['tag'] == 'CD' or head['word'] == 'many': # 3 of them return 'f_part_whole' elif pobj['tag'] == 'CD': # a total of 20 return 'Scale_value' else: return 'Entity_association' frames = {
Features 1. unigrams 2. bigrams 3. ''' # MaltParser for dependency triples # how to get maltparser to work: http://stackoverflow.com/questions/13207394/step-by-step-to-getting-malt-parser-in-nltk-to-work set_maltparser = "export MALT_PARSER=%s" % ( os.path.abspath("maltparser-1.8.1")) set_maltmodel = "export MALT_MODEL=%s" % ( os.path.abspath("engmalt.linear-1.7.mco")) os.system(set_maltmodel) os.system(set_maltparser) mp = MaltParser('maltparser-1.8.1', 'engmalt.linear-1.7.mco') # Name entity tagger #add the jar and model via their path: ner_jar = os.path.abspath("stanford-ner-2016-10-31/stanford-ner-3.7.0.jar") ner_model = os.path.abspath( "stanford-ner-2016-10-31/classifiers/english.all.3class.distsim.crf.ser.gz" ) ner_st = StanfordNERTagger(ner_model, ner_jar) pos_jar = os.path.abspath( "stanford-postagger-2016-10-31/stanford-postagger.jar") pos_model = os.path.abspath( "stanford-postagger-2016-10-31/models/english-bidirectional-distsim.tagger" ) pos_st = StanfordPOSTagger(pos_model, pos_jar)
# always the reversed list goes first and that's it? sublist1 = path1[j:] #print("sublist1",sublist1) if j< len(path2)-1: j=j+1 sublist2 = path2[j:] #print("sublist2",sublist2) sublist2.reverse() #print("sublist2",sublist2) shortestpath = sublist2 + sublist1 return shortestpath dparser = MaltParser('../data/grammars/maltparser-1.8.1/', 'engmalt.linear-1.7.mco') pt = dparser.parse_one('I shot an elephant in my pajamas .'.split()).tree() # print(pt) # print(pt) # print(shortestPath(pt,'I','pajamas')) # print(shortestPath(pt,'I','pajamas')) # print(shortestPath(pt,'elephant','pajamas')) # print(shortestPath(pt,'I','elephant')) # parsing many sentences tagged_sents= [
from role.corpus import * from nltk.corpus import brown from nltk.sem.glue import * nltk.sem.logic._counter._value = 0 from nltk.parse.malt import MaltParser # file = read_stanza_document_file("role/corpus/kant/fpmm_1_stanza.ann") # tagged = [[(w.text, w.xpos) for w in s.words] for s in file.sentences] brown_train = brown.tagged_sents(categories="news") unigram_tagger = UnigramTagger(brown_train) # bigram_tagger = BigramTagger(brown_train, backoff=unigram_tagger) # trigram_tagger = TrigramTagger(brown_train, backoff=bigram_tagger) main_tagger = RegexpTagger( [(r"(A|a|An|an|The|the)$", "ex_quant"), (r"(Every|every|All|all|Any|any)$", "univ_quant")], backoff=unigram_tagger, ) depparser = MaltParser('./maltparser-1.9.2', tagger=main_tagger.tag) glue = DrtGlue(depparser=depparser) print( main_tagger.tag( "The grand jury produced no evidence that any irregularities took place" .split())) readings = glue.parse_to_meaning( "The grand jury produced no evidence that any irregularities took place". split())
def __init__(self, version = "maltparser-1.9.2", model = "engmalt.linear-1.7.mco"): super().__init__() root = os.getcwd() version_path = os.path.join(root, "schemata", "parse", "maltparser", version ) model_path =os.path.join(root, "schemata", "parse", "maltparser", model ) self.base = MP(version_path, model_path)