def do_nltk_parsing(sentences): parser = parse.load_parser('venv/simple_grammar.fcfg', trace=2) for sentence in sentences: tokens = sentence.split() trees = parser.parse(tokens) for tree in trees: print(tree)
def parse_sents(inputs, grammar, trace=0): """ Convert input sentences into syntactic trees. :param inputs: sentences to be parsed :type inputs: list(str) :param grammar: ``FeatureGrammar`` or name of feature-based grammar :type grammar: nltk.grammar.FeatureGrammar :rtype: list(nltk.tree.Tree) or dict(list(str)): list(Tree) :return: a mapping from input sentences to a list of ``Tree`` instances. """ # put imports here to avoid circult dependencies from nltk.grammar import FeatureGrammar from nltk.parse import FeatureChartParser, load_parser if isinstance(grammar, FeatureGrammar): cp = FeatureChartParser(grammar) else: cp = load_parser(grammar, trace=trace) parses = [] for sent in inputs: tokens = sent.split() # use a tokenizer? syntrees = list(cp.parse(tokens)) parses.append(syntrees) return parses
def parse_sents(inputs, grammar, trace=0): """ Convert input sentences into syntactic trees. :param inputs: sentences to be parsed :type inputs: list(str) :param grammar: ``FeatureGrammar`` or name of feature-based grammar :type grammar: nltk.grammar.FeatureGrammar :rtype: list(nltk.tree.Tree) or dict(list(str)): list(Tree) :return: a mapping from input sentences to a list of ``Tree``s """ # put imports here to avoid circult dependencies from nltk.grammar import FeatureGrammar from nltk.parse import FeatureChartParser, load_parser if isinstance(grammar, FeatureGrammar): cp = FeatureChartParser(grammar) else: cp = load_parser(grammar, trace=trace) parses = [] for sent in inputs: tokens = sent.split() # use a tokenizer? syntrees = list(cp.parse(tokens)) parses.append(syntrees) return parses
def batch_parse(inputs, grammar, trace=0): """ Convert input sentences into syntactic trees. :param inputs: sentences to be parsed :type inputs: list of str :param grammar: ``FeatureGrammar`` or name of feature-based grammar :rtype: dict :return: a mapping from input sentences to a list of ``Tree``s """ # put imports here to avoid circult dependencies from nltk.grammar import FeatureGrammar from nltk.parse import FeatureChartParser, load_parser if isinstance(grammar, FeatureGrammar): cp = FeatureChartParser(grammar) else: cp = load_parser(grammar, trace=trace) parses = [] for sent in inputs: tokens = sent.split() # use a tokenizer? syntrees = cp.nbest_parse(tokens) parses.append(syntrees) return parses
def batch_parse(inputs, grammar, trace=0): """ Convert input sentences into syntactic trees. :param inputs: sentences to be parsed :type inputs: list of str :param grammar: L{FeatureGrammar} or name of feature-based grammar :rtype: dict :return: a mapping from input sentences to a list of L{Tree}s """ # put imports here to avoid circult dependencies from nltk.grammar import FeatureGrammar from nltk.parse import FeatureChartParser, load_parser if isinstance(grammar, FeatureGrammar): cp = FeatureChartParser(grammar) else: cp = load_parser(grammar, trace=trace) parses = [] for sent in inputs: tokens = sent.split() # use a tokenizer? syntrees = cp.nbest_parse(tokens) parses.append(syntrees) return parses
def __init__(self, gramfile=None): """ :param gramfile: name of file where grammar can be loaded :type gramfile: str """ self._gramfile = (gramfile if gramfile else 'grammars/book_grammars/discourse.fcfg') self._parser = load_parser(self._gramfile)
def hole_readings(sentence, grammar_filename=None, verbose=False): if not grammar_filename: grammar_filename = 'grammars/sample_grammars/hole.fcfg' if verbose: print 'Reading grammar file', grammar_filename parser = load_parser(grammar_filename) # Parse the sentence. tokens = sentence.split() trees = parser.nbest_parse(tokens) if verbose: print 'Got %d different parses' % len(trees) all_readings = [] for tree in trees: # Get the semantic feature from the top of the parse tree. sem = tree.node['SEM'].simplify() # Print the raw semantic representation. if verbose: print 'Raw: ', sem # Skolemize away all quantifiers. All variables become unique. while isinstance(sem, logic.LambdaExpression): sem = sem.term skolemized = skolemize(sem) if verbose: print 'Skolemized:', skolemized # Break the hole semantics representation down into its components # i.e. holes, labels, formula fragments and constraints. hole_sem = HoleSemantics(skolemized) # Maybe show the details of the semantic representation. if verbose: print 'Holes: ', hole_sem.holes print 'Labels: ', hole_sem.labels print 'Constraints: ', hole_sem.constraints print 'Top hole: ', hole_sem.top_hole print 'Top labels: ', hole_sem.top_most_labels print 'Fragments:' for (l,f) in hole_sem.fragments.items(): print '\t%s: %s' % (l, f) # Find all the possible ways to plug the formulas together. pluggings = hole_sem.pluggings() # Build FOL formula trees using the pluggings. readings = map(hole_sem.formula_tree, pluggings) # Print out the formulas in a textual format. if verbose: for i,r in enumerate(readings): print print '%d. %s' % (i, r) print all_readings.extend(readings) return all_readings
def hole_readings(sentence, grammar_filename=None, verbose=False): if not grammar_filename: grammar_filename = 'grammars/sample_grammars/hole.fcfg' if verbose: print 'Reading grammar file', grammar_filename parser = load_parser(grammar_filename) # Parse the sentence. tokens = sentence.split() trees = parser.nbest_parse(tokens) if verbose: print 'Got %d different parses' % len(trees) all_readings = [] for tree in trees: # Get the semantic feature from the top of the parse tree. sem = tree.node['SEM'].simplify() # Print the raw semantic representation. if verbose: print 'Raw: ', sem # Skolemize away all quantifiers. All variables become unique. while isinstance(sem, LambdaExpression): sem = sem.term skolemized = skolemize(sem) if verbose: print 'Skolemized:', skolemized # Break the hole semantics representation down into its components # i.e. holes, labels, formula fragments and constraints. hole_sem = HoleSemantics(skolemized) # Maybe show the details of the semantic representation. if verbose: print 'Holes: ', hole_sem.holes print 'Labels: ', hole_sem.labels print 'Constraints: ', hole_sem.constraints print 'Top hole: ', hole_sem.top_hole print 'Top labels: ', hole_sem.top_most_labels print 'Fragments:' for (l, f) in hole_sem.fragments.items(): print '\t%s: %s' % (l, f) # Find all the possible ways to plug the formulas together. pluggings = hole_sem.pluggings() # Build FOL formula trees using the pluggings. readings = map(hole_sem.formula_tree, pluggings) # Print out the formulas in a textual format. if verbose: for i, r in enumerate(readings): print print '%d. %s' % (i, r) print all_readings.extend(readings) return all_readings
def __init__(self): self.parser = parse.load_parser('base_parse.fcfg', trace=1) self.adj_num = 0 self.rel_num = 0 self.tot_adj = 0 self.linking_blocks = [] self.sem_blocks = []
def sentence_analysis(sent, out=True): if out: cp = parse.load_parser('pt_grammar.fcfg', trace=1) else: cp = parse.load_parser('pt_grammar.fcfg', trace=0) san = sent.strip(',.').lower() tokens = san.split() try: trees = cp.parse(tokens) for tree in trees: if out: print(tree) return True except: if out: print("Esta sentenca nao e valida ou a gramatica ainda nao esta completa...") return False
def demo(): cp = parse.load_parser('file:rdf.fcfg', trace=0) tokens = 'list the actors in the_shining'.split() trees = cp.nbest_parse(tokens) tree = trees[0] semrep = sem.root_semrep(tree) trans = SPARQLTranslator() trans.translate(semrep) print trans.query
def nltkTest(sentence): import nltk from nltk import grammar, parse cp = parse.load_parser('lib/nltk_data/grammars/book_grammars/german.fcfg', trace=0) sent = 'der Hund folgt der Katze' tokens = sent.split() trees = cp.parse(tokens) for tree in trees: print(tree)
def parse_with_bindops(sentence, grammar=None, trace=0): """ Use a grammar with Binding Operators to parse a sentence. """ if not grammar: grammar = 'grammars/book_grammars/storage.fcfg' parser = load_parser(grammar, trace=trace, chart_class=InstantiateVarsChart) # Parse the sentence. tokens = sentence.split() return parser.nbest_parse(tokens)
def __init__(self, gramfile=None): """ :param gramfile: name of file where grammar can be loaded :type gramfile: str """ if gramfile is None: self._gramfile = "grammars/book_grammars/discourse.fcfg" else: self._gramfile = gramfile self._parser = load_parser(self._gramfile)
def sentence_analysis(sent, out=True): if out: cp = parse.load_parser('pt_grammar.fcfg', trace=1) else: cp = parse.load_parser('pt_grammar.fcfg', trace=0) san = sent.strip(',.').lower() tokens = san.split() try: trees = cp.parse(tokens) for tree in trees: if out: print(tree) return True except: if out: print( "Esta sentenca nao e valida ou a gramatica ainda nao esta completa..." ) return False
def parser(sent, grammar): """ Richiama il parser della libreria NLTK :param sent: frase da parsificare :param grammar: percorso della grammatica di riferimento :return: tutti i possibili alberi di parsficazione """ cp = parse.load_parser(grammar, trace=0) # trace=1 se verbose tokens = sent.split() trees = cp.parse(tokens) return trees
def main(): cp = parse.load_parser('grammar.fcfg', trace=1, chart_class=InstantiateVarsChart) print(cp.grammar()) exit(1) for (i, phrase) in enumerate(phrases): default = "N.A." for (tree, formula) in analyses[i]: default = str(formula) print("Phrase: {}\nTraduction: {}".format(phrase, default))
def parser(sentence, path_to_grammar): """ Loads the NLTK's parser :param sentence: sentence to parse :param path_to_grammar: path to the grammar :return: all the parsing tree for the given sentence """ parser = parse.load_parser(path_to_grammar) tokens = sentence.split() trees = parser.parse(tokens) return trees
def analyze_sentence(sentence): # http://www.nltk.org/book/ch09.html#code-feat0cfg cp = parse.load_parser('grammar.fcfg', trace=0) tokens = sentence.split() found = False try: for tree in cp.parse(tokens): print(tree) found = True except ValueError as e: print(e) if not found: print("False.") else: print()
def main(args): """ Main entry point for the program """ #Load grammar from .fcfg file print("-------------Loading grammar---------------------") nlp_grammar = parse.load_parser(args.rule_file_name, trace=0) print("Grammar loaded at {}".format(args.rule_file_name)) write_file(1, str(nlp_grammar.grammar())) question = args.question #Get parse tree print("-------------Parsed structure-------------") tree = nlp_grammar.parse_one(question.replace('?', '').split()) print(question) print(tree) write_file(2, str(tree)) #Parse to logical form print("-------------Parsed logical form-------------") logical_form = str(tree.label()['SEM']).replace(',', ' ') print(logical_form) write_file(3, str(logical_form)) #Get procedure semantics print("-------------Procedure semantics-------------") procedure_semantics = parse_to_procedure(tree) print(procedure_semantics['str']) write_file(4, procedure_semantics['str']) #Retrive result: print("-------------Retrieved result-------------") results = retrieve_result(procedure_semantics) if len(results) == 0: print("No result found!") else: for result in results: print(result, end=' ', flush=True) print('') write_file(5, " ".join(results))
from nltk import * from nltk.corpus import machado from nltk import grammar, parse from nltk.parse.featurechart import InstantiateVarsChart sent_tokenizer=nltk.data.load('tokenizers/punkt/portuguese.pickle') raw_text1 = machado.raw('romance/marm05.txt') raw_text2 = machado.raw('romance/marm04.txt') raw_text3 = machado.raw('romance/marm03.txt') ptext1 = nltk.Text(machado.words('romance/marm01.txt')) ptext2 = nltk.Text(machado.words('romance/marm02.txt')) ptext3 = nltk.Text(machado.words('romance/marm03.txt')) ptext4 = nltk.Text(machado.words('romance/marm04.txt')) cp = parse.load_parser('grammars/book_grammars/feat0.fcfg', trace=1) stemmer = nltk.stem.RSLPStemmer() ## Checking version of the benchmarking if 'PyPy' in sys.version: version = 'PyPy {}'.format(sys.version) else: version = 'CPython {}'.format(sys.version) report.setup('PyPy' in version) def mute(): sys.stdout = codecs.open('/dev/null','w','utf8') #use codecs to avoid decoding errors def unmute(): sys.stdout = sys.__stdout__
from hmm_pcfg_files.tools import read_from_file import numpy as np import pandas as pd import csv import sys, time from nltk import tokenize from nltk.parse import ViterbiParser from nltk.grammar import toy_pcfg1, toy_pcfg2 from nltk import grammar, parse import nltk test_sent = read_from_file("hmm_pcfg_files/dev_sents") test_sent = [test[0].split(" ") for test in test_sent] cp = parse.load_parser('hmm_pcfg_files/pcfg', trace=1, format='pcfg') s = nltk.data.load('hmm_pcfg_files/pcfg', 'text') with open('hmm_pcfg_files/parses/candidate-parses', 'w') as f: for sentence in test_sent: f.write(output + "\n") f.close()
print(dexpr(s)) # The fol() method converts DRSs into FOL formulae. print(dexpr(r'([x],[man(x), walks(x)])').fol()) print(dexpr(r'([],[(([x],[man(x)]) -> ([],[walks(x)]))])').fol()) # In order to visualize a DRS, the pretty_format() method can be used. print(drs3.pretty_format()) # PARSE TO SEMANTICS # DRSs can be used for building compositional semantics in a feature based grammar. To specify that we want to use DRSs, the appropriate logic parser needs be passed as a parameter to load_earley() from nltk.parse import load_parser from nltk.sem.drt import DrtParser parser = load_parser('grammars/book_grammars/drt.fcfg', trace=0, logic_parser=DrtParser()) for tree in parser.parse('a dog barks'.split()): print(tree.label()['SEM'].simplify()) # Alternatively, a FeatStructReader can be passed with the logic_parser set on it from nltk.featstruct import FeatStructReader from nltk.grammar import FeatStructNonterminal parser = load_parser('grammars/book_grammars/drt.fcfg', trace=0, fstruct_reader=FeatStructReader( fdict_class=FeatStructNonterminal, logic_parser=DrtParser())) for tree in parser.parse('every girl chases a dog'.split()): print(tree.label()['SEM'].simplify().normalize())
import nltk from nltk import grammar, parse cp = parse.load_parser('base_parse.fcfg', trace=1) sent = 'the big blue box between the small red squares' tokens = [x.lower() for x in sent.split()] trees = cp.parse(tokens) for line in trees: line.draw() #for word in line: # word.draw() """ print('----------------') for i, tree in enumerate(trees): for node in tree: for n in node: for nn in n: print(nn, "nn") if type(nn) != str: print(nn.label().keys()) print(type(nn.label()["*type*"]), "label") for nnn in nn: print(nnn, "nnn") if type(nnn) != str: print(type(nnn.label())) print("==============") print(i) print(type(tree))
''' Created on 09 Ara 2012 @author: burakkerim ''' import sys from nltk.parse import load_parser cp = load_parser('file:extended.fcfg') sentences = [ #---------------------------------- # POSITIVES - already covered by the grammar #---------------------------------- ## ' ALREADY POSITIVES', ## 'Mary likes John', ## 'a boy disappeared', ## 'John eats sandwiches', ## 'a boy finds cats', ## 'the boy finds cats', ## 'Kim believes John likes her', ## 'the students vanished with the telescope', ## 'every woman likes John', ## 'Kim believes John likes her', #---------------------------------- # MISSING - add these to the grammar ## #---------------------------------- ## ' POSITIVES', 'the dog chased the cat which ate the mouse', 'people chase Sue who ate the unicorn which Tom saw',
def main(args): """ Main entry point for the program """ if args.language == "english": #English Version #Load grammar from .fcfg file print("-------------Loading grammar---------------------") nlp_grammar = parse.load_parser(args.rule_file_name, trace=0) print("Grammar loaded at {}".format(args.rule_file_name)) write_file(1, str(nlp_grammar.grammar())) question = args.question #Get parse tree English print("-------------Parsed structure-------------") tree = nlp_grammar.parse_one(question.replace('?', '').split()) print(question) print(tree) write_file(2, str(tree)) # #Parse to logical form print("-------------Parsed logical form-------------") logical_form = str(tree.label()['SEM']).replace(',', ' ') print(logical_form) write_file(3, str(logical_form)) #Get procedure semantics print("-------------Procedure semantics-------------") procedure_semantics = parse_to_procedure(tree) print(procedure_semantics['str']) write_file(4, procedure_semantics['str']) #Retrive result: print("-------------Retrieved result-------------") results = retrieve_result(procedure_semantics) if len(results) == 0: print("No result found!") else: for result in results: print(result, end=' ', flush=True) print('') write_file(5, " ".join(results)) else: #Vietnamse Version #Load grammar from .fcfg file # print("-------------Loading grammar---------------------") # nlp_grammar = parse.load_parser(args.rule_file_name, trace = 0) # print("Grammar loaded at {}".format(args.rule_file_name)) # write_file(1, str(nlp_grammar.grammar())) question = args.question visualize = args.visualize #Get parse tree English print("-------------Parsed structure-------------") print(question) # tree = nlp_grammar.parse_one(question.replace('?','').split()) tree, token_def, doc = spacy_viet( question.replace('?', '').replace(':', '').replace('.', ''), visualize) write_file(2, str(tree) + "\n" + token_def) print("-------------Parsed logical form-------------") from code_featstructures import mainLogic featStructCfg = mainLogic(doc) #Parse to logical form logical_form = featStructCfg['sem'] # print(logical_form) write_file(3, str(logical_form)) from nlp_featstruct_parser import code_featstructures_to_procedure #Get procedure semantics print("-------------Procedure semantics-------------") procedure_semantics = code_featstructures_to_procedure(featStructCfg) print(procedure_semantics['str']) write_file(4, procedure_semantics['str']) #Retrive result: print("-------------Retrieved result-------------") results = retrieve_result(procedure_semantics) if len(results) == 0: print("No result found!") else: for result in results: print(result, end=' ', flush=True) print('') write_file(5, " ".join(results))
def loadGrammar(self, grammarFilename): self.parser = parse.load_parser(grammarFilename, trace=1, cache=False)
''' Chandu Budati CSCI 6350-001 Project #4 Due: 03/23/2018 File Description: This file contians all functions required to run Grammar checker program, actual grammar is in file p4grammar.fcfg ''' from nltk import grammar, parse, tree fname = "sents.txt" #input("file address: ") #loading parsing grammar cp = parse.load_parser('p4grammar.fcfg', trace=0, parser=parse.FeatureEarleyChartParser) #importing test data f = open(fname, 'r') testdata = f.readlines() f.close() testdata = [line.strip() for line in testdata] parsed = [] for sent in testdata: tokens = sent.split() #generating tokens trees = cp.parse(tokens) trees = list(trees) if (len(trees) == 0): parsed.append("") print() # for tree in trees:
def hole_readings(sentence, grammar_filename=None, verbose=False): if not grammar_filename: grammar_filename = "grammars/sample_grammars/hole.fcfg" if verbose: print("Reading grammar file", grammar_filename) parser = load_parser(grammar_filename) # Parse the sentence. tokens = sentence.split() trees = parser.nbest_parse(tokens) if verbose: print("Got %d different parses" % len(trees)) all_readings = [] for tree in trees: # Get the semantic feature from the top of the parse tree. sem = tree.label()["SEM"].simplify() # Print the raw semantic representation. if verbose: print("Raw: ", sem) # Skolemize away all quantifiers. All variables become unique. while isinstance(sem, LambdaExpression): sem = sem.term skolemized = skolemize(sem) if verbose: print("Skolemized:", skolemized) # Break the hole semantics representation down into its components # i.e. holes, labels, formula fragments and constraints. hole_sem = HoleSemantics(skolemized) # Maybe show the details of the semantic representation. if verbose: print("Holes: ", hole_sem.holes) print("Labels: ", hole_sem.labels) print("Constraints: ", hole_sem.constraints) print("Top hole: ", hole_sem.top_hole) print("Top labels: ", hole_sem.top_most_labels) print("Fragments:") for (l, f) in hole_sem.fragments.items(): print("\t%s: %s" % (l, f)) # Find all the possible ways to plug the formulas together. pluggings = hole_sem.pluggings() # Build FOL formula trees using the pluggings. readings = list(map(hole_sem.formula_tree, pluggings)) # Print out the formulas in a textual format. if verbose: for i, r in enumerate(readings): print() print("%d. %s" % (i, r)) print() all_readings.extend(readings) return all_readings
def hole_readings(sentence, grammar_filename=None, verbose=False): if not grammar_filename: grammar_filename = "grammars/sample_grammars/hole.fcfg" if verbose: print("Reading grammar file", grammar_filename) parser = load_parser(grammar_filename) # Parse the sentence. tokens = sentence.split() trees = list(parser.parse(tokens)) if verbose: print("Got %d different parses" % len(trees)) all_readings = [] for tree in trees: # Get the semantic feature from the top of the parse tree. sem = tree.label()["SEM"].simplify() # Print the raw semantic representation. if verbose: print("Raw: ", sem) # Skolemize away all quantifiers. All variables become unique. while isinstance(sem, LambdaExpression): sem = sem.term skolemized = skolemize(sem) if verbose: print("Skolemized:", skolemized) # Break the hole semantics representation down into its components # i.e. holes, labels, formula fragments and constraints. hole_sem = HoleSemantics(skolemized) # Maybe show the details of the semantic representation. if verbose: print("Holes: ", hole_sem.holes) print("Labels: ", hole_sem.labels) print("Constraints: ", hole_sem.constraints) print("Top hole: ", hole_sem.top_hole) print("Top labels: ", hole_sem.top_most_labels) print("Fragments:") for l, f in hole_sem.fragments.items(): print("\t%s: %s" % (l, f)) # Find all the possible ways to plug the formulas together. pluggings = hole_sem.pluggings() # Build FOL formula trees using the pluggings. readings = list(map(hole_sem.formula_tree, pluggings)) # Print out the formulas in a textual format. if verbose: for i, r in enumerate(readings): print() print("%d. %s" % (i, r)) print() all_readings.extend(readings) return all_readings
def nbest_parse(self, xx): parser = parse.load_parser('file:hw2.fcfg', trace =2) wordlist = xx.split() tree = parser.nbest_parse(wordlist) for a in tree : print a
tbwc = tb.word_counts srtd = sorted(tbwc, key=tbwc.get, reverse=True) for w in srtd: if not w in fnagl: notinlist.append(w) with open(r'notingloss.txt', 'w', encoding='utf-8') as f: for w in notinlist: print(w, file=f) if (False): from nltk import grammar, parse sent = ' to 1·5–2·3 cm. tall' tokens = ['to', '15', '-', '23', 'cm', '.', 'in', 'diam.'] # tokens = ['to','23','m','tall'] cp = parse.load_parser('../resources/simplerange.fcfg', trace=2) trees = cp.parse(tokens) for tree in trees: print(tree) if (False): import linkgrammar as lg sents = re.split(r'(?<=\.)\s+(?=[A-Z])|;\s+', testtext) p = lg.Parser(lang="en", verbosity=1, max_null_count=10) for sent in sents: print(sent) linkages = p.parse_sent(sent) for linkage in linkages[0:1]: print(linkage.num_of_links, linkage.constituent_phrases_nested)