Example #1
0
def parse(s):
    grammar = """

	S -> '{' A '}'

	A -> '"' P '"' ':' S | '"' P '"' ':' S ',' A | '"' P '"' ':' '"' P '"' ',' A | '"' P '"' ':' N  ',' A | '"' P '"' ':' '"' P '"' | '"' P '"' ':' N 

	N -> N '0' | N '1' | N '2' | N '3' | N '4' | N '5' | N '6' | N '7' | N '8' | N '9' | '0' |'1' | '3' | '2' | '4' | '5' | '6' | '7' | '8' | '9'

	"""

    #Agrego los caracteres
    grammar += "P ->"

    for char in string.ascii_lowercase:
        grammar += " P '" + char + "'|"
        grammar += " '" + char + "' |"

    for char in string.ascii_uppercase:
        grammar += " P '" + char + "' |"
        grammar += " '" + char + "' |"

    grammar += " P '_' | '_'"

    grammar = nltk.CFG.fromstring(grammar)
    s_tok = tokenize(s.strip())
    parser = nltk.LeftCornerChartParser(grammar)
    tree = [t for t in parser.parse(s_tok)][:1]
    return tree
Example #2
0
def parse(s):
    grammarString = """
    S -> 'a' S 'b' | 'a' 'b' 
    """
    grammar = nltk.CFG.fromstring(grammarString)
    s_tok = tokenize(s.strip())
    parser = nltk.LeftCornerChartParser(grammar)
    tree = [t for t in parser.parse(s_tok)][:1]
    return tree
Example #3
0
def parse(s):
    grammar = """
    S ->'(' S ')' | S '*' S | S '+' S | NUM
    NUM -> '0' |'1' | '2' | '3' |'4' | '5' | '6' | '7' | '8' | '9' | '0' NUM | '1' NUM | '2' NUM | '3' NUM | '4' NUM | '5' NUM | '6' NUM | '7' NUM | '8' NUM | '9' NUM 
    """
    grammar = nltk.CFG.fromstring(grammar)
    s_tok = tokenize(s.strip())
    parser = nltk.LeftCornerChartParser(grammar)
    tree = [t for t in parser.parse(s_tok)][:1]
    return tree
Example #4
0
 def determine_chromosome_from(self, sequence):
     for idx, grammar in enumerate([
             self.median_grammar, self.submedian_grammar,
             self.acrocentric_grammar
     ]):
         p = nltk.LeftCornerChartParser(grammar)
         for t in p.parse(sequence):
             if idx == 0: return "Median"
             elif idx == 1: return "Submedian"
             else: return "Acrocentric"
Example #5
0
def parse(s):
    grammarString = """
    E -> A | E '*' A '*' E | E '_' A '_' E | '*' A '*' | '_' A '_' 
    A -> A B | A C | A D | B | C | D
    B -> 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z'
    C -> 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z'
    D -> ' ' | '.' | ',' | '(' | ')'
    """
    grammar = nltk.CFG.fromstring(grammarString)
    s_tok = tokenize(s.strip())
    parser = nltk.LeftCornerChartParser(grammar)
    tree = [t for t in parser.parse(s_tok)][:1]
    return tree
Example #6
0
def parse(s):
    grammarString = """
    Q0 -> 'A' Q2 | 'B' Q2 | 'C' Q1 | 'A' | 'B'
    Q1 -> 'A' Q3 | 'B' Q3 | 'C' Q4 | 'A' | 'B' | 'C'
    Q2 -> 'A' Q1 | 'B' Q1 | 'C' Q5 
    Q3 -> 'A' Q4 | 'B' Q4 | 'C' Q2 | 'A' | 'B' | 'C'
    Q4 -> 'A' Q2 | 'B' Q2 | 'C' Q1 | 'A' | 'B' 
    Q5 -> 'A' Q4 | 'B' Q4 | 'C' Q2 | 'A' | 'B' | 'C'
    """
    grammar = nltk.CFG.fromstring(grammarString)
    s_tok = tokenize(s.strip())
    parser = nltk.LeftCornerChartParser(grammar)
    tree = [t for t in parser.parse(s_tok)][:1]

    return tree
Example #7
0
File: v.py Project: nachos5/skoli
def ver18():
    grammar = get_grammar()
    # fyrsta röðin í töflunni (hausinn)
    table = PrettyTable(
        ["Þáttari", "Setning 1", "Setning 2", "Setning 3", "Samtals"])

    # þrjár ólíkar setningar
    sents = []
    sents.append("þú ert hestur")
    sents.append("ég ætla að vera besta sítrónan")
    sents.append("banani og gúrka borða appelsínu og okkur")

    # þáttararnir sem við ætlum að prófa
    parsers = [
        nltk.TopDownChartParser(grammar),
        nltk.BottomUpChartParser(grammar),
        nltk.LeftCornerChartParser(grammar),
    ]
    parsers_strings = [
        "TopDownChartParser",
        "BottomUpChartParser",
        "LeftCornerChartParser",
    ]
    # fylki til að halda utan um heildartíma setninga
    sent_total_time = [0, 0, 0]
    # ítrum í gegnum setningarnar fyrir hvern þáttara og geymum tímana í töflunni
    for j, parser in enumerate(parsers_strings):
        # heildartími þáttarans
        total = 0
        row = [parser]
        for i, sent in enumerate(sents):
            # prentum tré fyrir hvern þáttara + setningu
            print("{parser} þáttunartré fyrir setningu {i}:".format(
                parser=parser, i=i + 1))
            print_tree(parsers[j], sent, True)
            print()
            # tökum svo tímann
            time = timeit_parser(parser, sent, 1000)
            total += time
            sent_total_time[i] += round(Decimal(time), 6)
            row.append(round(Decimal(time), 6))
        row.append(round(Decimal(total), 6))
        table.add_row(row)
    # bætum við heildartíma setninga í töfluna
    table.add_row(["Samtals"] + sent_total_time + [sum(sent_total_time)])
    print("Tafla:\n", table)
 def validateSentence(self):
     """This function is going to split the sentences into words,applies pos tagging, extract tags and generates
         parse trees using the tags
         input: List of sentences
         output: returns validity of sentences
      """
     for s in self.sent:
         count = 0
         s = "".join(c for c in s if c not in ('!', '.', ':', ','))
         stoken = word_tokenize(s)
         # print(stoken)
         tagged = nltk.pos_tag(stoken)
         pos_tags = [pos for (token, pos) in nltk.pos_tag(stoken)]
         # print(pos_tags)
         rd_parser = nltk.LeftCornerChartParser(self.grammar)
         for tree in rd_parser.parse(pos_tags):
             count = count + 1
             break
         if count == 0:
             print("Invalid sentence")
         else:
             print("Valid sentence")
     print("Total time taken:",(time.time()-self.startTime))
Example #9
0
CONJP -> "indicating" "that"
ADVP -> "markedly" "different"
N -> "We" | "different" "species" | "subspecies" | "use"
V -> "showed" | "distinguish" | "is" | "can"
CC -> "and" | "but"
IN -> "from" | "of"
ADJP -> "not" "arbitrary,"
""")

# This grammar produces a decent parse that runs quickly. To do: refine my
# knowledge of grammar and fix the CONJP structure, which is not a real tag.
grammar4 = nltk.CFG.fromstring("""
S -> NP VP NP | NP VP NP VP NP | S CONJP S
NP -> N | NP CC N | ADJ N | ADVP NP | NP PP | N N
VP -> V | V VP |VP CC VP | V ADJP | "to" V | "found" "that"
CONJP -> "indicating" "that"
PP -> P NP
ADVP -> ADV ADJ
ADJP -> ADJ | "not" ADJ
ADV -> "markedly"
ADJ -> "arbitrary," | "different" | "one"
CC -> "but" | "and"
N -> "We" | "subspecies" | "use" | "another." | "howl" | "types," | "modulation" | "species" | "population"
V -> "showed" | "is" | "can" | "be" | "used" | "distinguish"
P -> "of" | "from"
""")

lc_parser = nltk.LeftCornerChartParser(grammar4)
for tree in lc_parser.parse(sentence):
    print tree