def parse(s): grammar = """ S -> '{' A '}' A -> '"' P '"' ':' S | '"' P '"' ':' S ',' A | '"' P '"' ':' '"' P '"' ',' A | '"' P '"' ':' N ',' A | '"' P '"' ':' '"' P '"' | '"' P '"' ':' N N -> N '0' | N '1' | N '2' | N '3' | N '4' | N '5' | N '6' | N '7' | N '8' | N '9' | '0' |'1' | '3' | '2' | '4' | '5' | '6' | '7' | '8' | '9' """ #Agrego los caracteres grammar += "P ->" for char in string.ascii_lowercase: grammar += " P '" + char + "'|" grammar += " '" + char + "' |" for char in string.ascii_uppercase: grammar += " P '" + char + "' |" grammar += " '" + char + "' |" grammar += " P '_' | '_'" grammar = nltk.CFG.fromstring(grammar) s_tok = tokenize(s.strip()) parser = nltk.LeftCornerChartParser(grammar) tree = [t for t in parser.parse(s_tok)][:1] return tree
def parse(s): grammarString = """ S -> 'a' S 'b' | 'a' 'b' """ grammar = nltk.CFG.fromstring(grammarString) s_tok = tokenize(s.strip()) parser = nltk.LeftCornerChartParser(grammar) tree = [t for t in parser.parse(s_tok)][:1] return tree
def parse(s): grammar = """ S ->'(' S ')' | S '*' S | S '+' S | NUM NUM -> '0' |'1' | '2' | '3' |'4' | '5' | '6' | '7' | '8' | '9' | '0' NUM | '1' NUM | '2' NUM | '3' NUM | '4' NUM | '5' NUM | '6' NUM | '7' NUM | '8' NUM | '9' NUM """ grammar = nltk.CFG.fromstring(grammar) s_tok = tokenize(s.strip()) parser = nltk.LeftCornerChartParser(grammar) tree = [t for t in parser.parse(s_tok)][:1] return tree
def determine_chromosome_from(self, sequence): for idx, grammar in enumerate([ self.median_grammar, self.submedian_grammar, self.acrocentric_grammar ]): p = nltk.LeftCornerChartParser(grammar) for t in p.parse(sequence): if idx == 0: return "Median" elif idx == 1: return "Submedian" else: return "Acrocentric"
def parse(s): grammarString = """ E -> A | E '*' A '*' E | E '_' A '_' E | '*' A '*' | '_' A '_' A -> A B | A C | A D | B | C | D B -> 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z' C -> 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z' D -> ' ' | '.' | ',' | '(' | ')' """ grammar = nltk.CFG.fromstring(grammarString) s_tok = tokenize(s.strip()) parser = nltk.LeftCornerChartParser(grammar) tree = [t for t in parser.parse(s_tok)][:1] return tree
def parse(s): grammarString = """ Q0 -> 'A' Q2 | 'B' Q2 | 'C' Q1 | 'A' | 'B' Q1 -> 'A' Q3 | 'B' Q3 | 'C' Q4 | 'A' | 'B' | 'C' Q2 -> 'A' Q1 | 'B' Q1 | 'C' Q5 Q3 -> 'A' Q4 | 'B' Q4 | 'C' Q2 | 'A' | 'B' | 'C' Q4 -> 'A' Q2 | 'B' Q2 | 'C' Q1 | 'A' | 'B' Q5 -> 'A' Q4 | 'B' Q4 | 'C' Q2 | 'A' | 'B' | 'C' """ grammar = nltk.CFG.fromstring(grammarString) s_tok = tokenize(s.strip()) parser = nltk.LeftCornerChartParser(grammar) tree = [t for t in parser.parse(s_tok)][:1] return tree
def ver18(): grammar = get_grammar() # fyrsta röðin í töflunni (hausinn) table = PrettyTable( ["Þáttari", "Setning 1", "Setning 2", "Setning 3", "Samtals"]) # þrjár ólíkar setningar sents = [] sents.append("þú ert hestur") sents.append("ég ætla að vera besta sítrónan") sents.append("banani og gúrka borða appelsínu og okkur") # þáttararnir sem við ætlum að prófa parsers = [ nltk.TopDownChartParser(grammar), nltk.BottomUpChartParser(grammar), nltk.LeftCornerChartParser(grammar), ] parsers_strings = [ "TopDownChartParser", "BottomUpChartParser", "LeftCornerChartParser", ] # fylki til að halda utan um heildartíma setninga sent_total_time = [0, 0, 0] # ítrum í gegnum setningarnar fyrir hvern þáttara og geymum tímana í töflunni for j, parser in enumerate(parsers_strings): # heildartími þáttarans total = 0 row = [parser] for i, sent in enumerate(sents): # prentum tré fyrir hvern þáttara + setningu print("{parser} þáttunartré fyrir setningu {i}:".format( parser=parser, i=i + 1)) print_tree(parsers[j], sent, True) print() # tökum svo tímann time = timeit_parser(parser, sent, 1000) total += time sent_total_time[i] += round(Decimal(time), 6) row.append(round(Decimal(time), 6)) row.append(round(Decimal(total), 6)) table.add_row(row) # bætum við heildartíma setninga í töfluna table.add_row(["Samtals"] + sent_total_time + [sum(sent_total_time)]) print("Tafla:\n", table)
def validateSentence(self): """This function is going to split the sentences into words,applies pos tagging, extract tags and generates parse trees using the tags input: List of sentences output: returns validity of sentences """ for s in self.sent: count = 0 s = "".join(c for c in s if c not in ('!', '.', ':', ',')) stoken = word_tokenize(s) # print(stoken) tagged = nltk.pos_tag(stoken) pos_tags = [pos for (token, pos) in nltk.pos_tag(stoken)] # print(pos_tags) rd_parser = nltk.LeftCornerChartParser(self.grammar) for tree in rd_parser.parse(pos_tags): count = count + 1 break if count == 0: print("Invalid sentence") else: print("Valid sentence") print("Total time taken:",(time.time()-self.startTime))
CONJP -> "indicating" "that" ADVP -> "markedly" "different" N -> "We" | "different" "species" | "subspecies" | "use" V -> "showed" | "distinguish" | "is" | "can" CC -> "and" | "but" IN -> "from" | "of" ADJP -> "not" "arbitrary," """) # This grammar produces a decent parse that runs quickly. To do: refine my # knowledge of grammar and fix the CONJP structure, which is not a real tag. grammar4 = nltk.CFG.fromstring(""" S -> NP VP NP | NP VP NP VP NP | S CONJP S NP -> N | NP CC N | ADJ N | ADVP NP | NP PP | N N VP -> V | V VP |VP CC VP | V ADJP | "to" V | "found" "that" CONJP -> "indicating" "that" PP -> P NP ADVP -> ADV ADJ ADJP -> ADJ | "not" ADJ ADV -> "markedly" ADJ -> "arbitrary," | "different" | "one" CC -> "but" | "and" N -> "We" | "subspecies" | "use" | "another." | "howl" | "types," | "modulation" | "species" | "population" V -> "showed" | "is" | "can" | "be" | "used" | "distinguish" P -> "of" | "from" """) lc_parser = nltk.LeftCornerChartParser(grammar4) for tree in lc_parser.parse(sentence): print tree