コード例 #1
1
    def __init__(self):
        
        #comment about what each part of speach is:
        """ CC   - conjunction: or, but, and, either
            CD   - number: one, two, three
            DT   - determiner: a, an, the, both, all, these, any, some
            EX   - the word 'there'
            IN   - preposition: in, of, with, for, under, among, upon, at
            JJ   - adjective: certain, curious, little, golden, other, offended
            JJS  - adjective: -est : best, loveliest, largest
            JJR  - adjective: -er : lerger, smaller, worse
            MD   - can, dare, should, will*, might, could, must
            NN   - common singular noun
            NNS  - common plural noun
            NNP  - proper singular noun
            NNPS - proper plural noun
            PDT  - all, both, quite, many, half
            PRP  - hers, her, himself, thy, us, it, I, him, you, they
            PRPP - possesive: his, mine, our, my, her, its, your
            RB   - adverb: very, not, here, there, first, just, down, again, beautifully, -ly
            RBR  - more
            RBS  - adverb superlative: -est
            RP   - participle: up, down, out, away, over, off
            TO   - the word 'to'
            UH   - interjection
            VB   - vocative verb: to ___ 
            VBD  - past verb: -ed : was*(freq. occur), had, dipped, were, said, seemed
            VBG  - present verb: -ing: trembling, trying, getting, running, swimming
            VBN  - past verb descriptive: crowded, mutated, fallen, lit, lost, forgtten
            VBP  - present verb: not -s: am, wish, make, know, do, find
            VBZ  - present verb: -s : is*, has, seems
            WDT  - what, which, that*
            WP   - who, what
            WRB  - how, whenever, where, why, when
        """

        # create base of cfg
        g = CFG.fromstring("""
            S -> NPS VPS | NPS VPS | NPS VPS | NPP VPP | VPO | NPO
            S -> NPS VPS | NPP VPP | NPS VPS

            NPS -> 'DT' 'NN' | 'DT' 'NN' | 'DT' 'JJ' 'NN' | 'DT' 'JJ' 'NN'
            NPS -> 'EX' 'the' 'NN' | 'the' 'JJS' 'NN'
            NPS -> 'she' | 'he' | 'it' | 'I'
            NPS -> NPS INP | INP NPS

            NPP -> 'the' 'NNS' | 'the' 'NNS' | 'NNS'
            NPP -> 'the' 'JJ' 'NNS'
            NPP -> 'they' | 'you' | 'we'

            VING -> 'VBG' | 'VBG' | 'RB' 'VBG'
            VBB -> 'VB' | 'VB' | 'VBP' 

            SM -> 'is' | 'was' | 'has been'

            VPS -> SM 'VBN' | SM 'VBN' 'like the' 'JJ' 'NN'
            VPS -> SM VING | SM VING INP
            VPS -> SM VING 'like' 'DT' 'JJ' 'NN'
            VPS -> SM VING 'like a' 'NN' INP
            VPS -> SM 'as' 'JJ' 'as' 'JJ'
            VPS -> SM 'a' 'JJ' 'NN'
            VPS -> SM 'a' 'NN' INP
            VPS -> 'MD' 'have been' VING
            VPS -> 'is' 'JJ' 'and' 'JJ'
            VPS -> 'VBD' INP | 'RB' 'VBD'
            VPS -> SM 'VBD' 'like' 'DT' 'JJ' 'NN'
            VPS -> SM 'as' 'JJ' 'as the' 'NN'
            VPS -> 'VBD' 'NN' | 'VBD' 'DT' 'NN'
            VPS -> 'VBD' 'and' 'VBD' INP 'until' 'VBN'
            VPS -> VPS 'and' S
            VPS -> 'VBD' 'JJR' 'than' 'a' 'NN'
            VPS -> 'VBD' 'EX'
            VPS -> SM 'JJ' | SM 'VB' INP

            NPO -> 'a' 'NN' 'IN' 'NNP'
            NPO -> 'the' 'NN' 'IN' 'the' 'JJ' 'NNP'
            NPO -> 'the' 'NNS' 'IN' 'the' 'NN'

            VPO -> 'VBG' 'like' 'NNP' 'RP' 'DT' 'JJ' 'NN' 'IN' 'DT' 'NN'
            VPO -> 'has been' 'VBG' 'RP' 'and' 'VBG'
            
            PM -> 'are' | 'were' | 'have been'

            VPP -> PM VING | PM VING INP
            VPP -> PM VING 'like the' 'NNS' INP
            VPP -> PM 'as' 'JJ' 'as' NPS INP | PM 'JJ' 'like' 'NNS' | PM 'JJ' 'like' VBG 'NNS'
            VPP -> PM 'VBN' | PM 'VBN' INP
            VPP -> PM 'as' 'JJ' 'as' 'JJ' | PM 'as' 'JJ' 'as' 'VBG' 'NNS'
            VPP -> PM 'NNS' INP
            VPP -> PM 'JJ' 'NNS'
            VPP -> 'are' 'JJ' 'and' 'JJ'
            VPP -> 'VBD' INP | 'VBD' 'RP' INP
            VPP -> PM 'JJ' | PM 'VB' INP
            
            INP -> 'IN' 'DT' 'NN' | 'IN' 'the' 'NNS' | 'IN' 'the' 'JJ' 'NNS'
            INP -> 'IN' 'DT' 'NN' 'IN' 'DT' 'NN'
            INP -> 'IN' 'DT' 'JJ' 'NN' | 'RP' 'IN' 'DT' 'JJ' 'NN'
            INP -> 'RP' 'IN' 'DT' 'NN' | 'IN' 'JJ' 'NNS'
            INP -> 'IN' 'DT' 'NN' | 'RP' 'DT' 'NNS'
            """)

        # save grammar to self.cfg
        self.cfg = CFG.fromstring(str(g).split('\n')[1:])
        self.cfg._start = g.start()       
コード例 #2
0
ファイル: TranslatorSKLearn.py プロジェクト: Dinusha9999/FYP
def verifygrammar(label, codestring, varname):
    regexp_tagger = RegexpTagger([
        (r"^[0-9]+$", "decimal"),
        (r"^0x[0-9A-Fa-f]+$", "hexadecimal"),
    ])
    # VARIABLE LINE GENERATION - Assumption - Complex numbers data types are ignored for data mining algorithms
    if label.tag == 'var':
        varGrammar = CFG.fromstring("""
            S -> VN "=" VV
            VN -> """ + varname + """
            VV -> I | D | ST | B
            B -> True | False
            I -> I N | N
            D -> I"."F
            F -> F N | N
            ST -> "'"STI"'"
            STI -> S N | S C | N | C
            N -> 0|1|2|3|4|5|6|7|8|9
            C -> a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z
            """)
    elif label.tag == 'array':
        arrayGrammar = CFG.fromstring("""
            S -> AN "= [" AE "]"
            AN -> """ + varname + """
            AE -> VV AE | VV
            VV -> I | D | ST | B
            B -> True | False
            I -> I N | N
            D -> I"."F
            F -> F N | N
            ST -> "'"STI"'"
            STI -> S N | S C | N | C
            N -> 0|1|2|3|4|5|6|7|8|9
            C -> a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z
            """)
コード例 #3
0
ファイル: parser.py プロジェクト: Roceso1337/FriendBot
def respondQuestion(sentence, keyWord, POS):
	if "Tell me" not in sentence:
		grammar = ""

		if POS == "NNPS" or POS == "NNS":
			grammar = CFG.fromstring("""
			S -> H-NP1 Adj VP'?' | Wh-NP VP'?'
			H-NP1 -> 'How'
			Wh-NP -> 'Who' | 'What' | 'Where' | 'What'
			Adj -> 'big' | 'small' | 'happy' | 'sad' | 'large' | 'difficult' | 'emotional' | 'old' | 'healthy' | 'strong' | 'cute' | 'hungry'
			NP -> Pronoun | Proper-Noun | Noun
			Pronoun -> 'they' | 'those'
			Proper-Noun -> '[]'
			Noun -> 'the <>'
			VP -> Verb NP  
			Verb -> 'are' 
			""")
		elif POS == "NN" or "NNP":
			grammar = CFG.fromstring("""
			S -> H-NP1 Adj VP'?' | Wh-NP VP'?'
			H-NP1 -> 'How'
			Wh-NP -> 'Who' | 'What' | 'Where' | 'What'
			Adj -> 'big' | 'small' | 'happy' | 'sad' | 'large' | 'difficult' | 'emotional' | 'old' | 'healthy' | 'strong' | 'cute' | 'hungry'
			NP -> Pronoun | Proper-Noun | Noun
			Pronoun -> 'it' | 'that'
			Proper-Noun -> '[]'
			Noun -> 'the <>'
			VP -> Verb NP  
			Verb -> 'is' 
			""")

		rand_sent_list = []
                response = ""
		for sentence in generate(grammar):
		    rand_sent_list.append(' '.join(sentence))
		while True:
			num = randint(0, len(rand_sent_list)-1)
			response = rand_sent_list[num]
			if "<>" in response and (POS == "NNS" or POS == "NN"):
				index = response.index("<>")
				response = response[:index] + keyWord + response[index+2:]
				break
			if "[]" in response and (POS == "NNPS" or POS == "NNP"):
				index = response.index("[]")
				response = response[:index] + keyWord + response[index+2:]
				break
			if "<>" not in response and "[]" not in response:
				break
		return response
	else:
		knowledgeRep(sentence)
コード例 #4
0
def generate_pairs(depth, cfg):
    '''
    num_pairs: Integer denoting the number of translation pairs
    depth: integer for thedepth of the parse tree in the CFG
    cfg: chosen grammar, 1, 2 or 3
    '''
    if (cfg == 1):
        grammar = CFG.fromstring("""
        S -> Y  
        Y ->   a Y b | a Y | a |
        a -> '(' ')'  
        b -> '{' '}'  
        """)
    elif cfg == 2:
        grammar = CFG.fromstring("""
        S ->  X | Y  | X Y
        X -> a
        Y ->  b
        a -> '(' a ')'  |  
        b -> '{' b '}'  | 
        """)
    elif cfg == 3:
        grammar = CFG.fromstring("""
        S ->  X 
        X -> a | b
        a -> '(' a ')'  |  
        b -> '{' b '}' | '{' a '}'
        """)
    trg = list(generate(grammar, depth=depth))
    trg_list = []
    for sentence in trg:
        k = ''.join(sentence)
        trg_list.append(k)

    src_list = trg2src(trg)

    if cfg == 1:
        A = list((s + 'A ' for s in src_list))
    elif cfg == 2:
        A = list((s + 'B ' for s in src_list))
    elif cfg == 3:
        A = list((s + 'C ' for s in src_list))
    else:
        None

    B = list((s for s in trg_list))

    df = pd.concat([pd.Series(A), pd.Series(B)], axis=1)
    pairs = (df.iloc[:, 0] + df.iloc[:, 1]).values.tolist()
    return pairs
コード例 #5
0
def gen_grammar3_past_plural(verb, direct_object, count):
    g1 = """
	S -> W TR SUB V '?' | WA TR SUB V DO '?' 
	W -> 'who' | 'what' | 'when' | 'where' | 'why' | 'how'
	WA -> 'when' | 'where' | 'why' | 'how'
	TR -> 'have'
	SUB -> PRO
	PRO -> 'they' |'you'
	V -> '%s'
	DO -> 'the %s'
	""" % (verb, direct_object)
    grammar1 = CFG.fromstring(g1)
    multiplier = 0
    with open('sentences.csv', 'ab') as csvwriter:
        writer = csv.writer(csvwriter)
        for sentence in generate(grammar1, n=999):
            if sentence.find('who') == 0:
                multiplier = 1
            if sentence.find('what') == 0:
                multiplier = 1
            if sentence.find('when') == 0:
                multiplier = 2
            if sentence.find('where') == 0:
                multiplier = 2
            if sentence.find('why') == 0:
                multiplier = 4
            if sentence.find('how') == 0:
                multiplier = 4
            writer.writerow((' '.join(sentence), multiplier * count))
コード例 #6
0
ファイル: cky.py プロジェクト: BabisK/M36209P
def main():
    parser = argparse.ArgumentParser(description='CKY and PCKY')
    parser.add_argument('-g',
                        '--grammar',
                        help='Input file name',
                        required=True)
    parser.add_argument('-s',
                        '--sentence',
                        help='Input sentence',
                        required=True)
    args = parser.parse_args()

    grammar_text = None
    with open(args.grammar, 'r') as f:
        grammar_text = f.read()

    grammar = None
    result = None
    try:
        grammar = CFG.fromstring(grammar_text)
    except ValueError:
        grammar = PCFG.fromstring(grammar_text)

    if type(grammar) is CFG:
        result = cky(args.sentence, grammar)
    elif type(grammar) is PCFG:
        result = pcky(args.sentence, grammar)
コード例 #7
0
    def __init__(self,
                 cfg_grammar=None,
                 origin_file='save/origin.txt',
                 oracle_file='save/oracle.txt',
                 wi_dict='save/word_index_dict.json',
                 iw_dict='save/index_word_dict.json',
                 sequence_length=None,
                 generate_from_scratch=False):
        if cfg_grammar is None:
            cfg_grammar = """
              S -> S PLUS x | S SUB x |  S PROD x | S DIV x | x | '(' S ')'
              PLUS -> '+'
              SUB -> '-'
              PROD -> '*'
              DIV -> '/'
              x -> 'x' | 'y'
            """

        self.grammar = CFG.fromstring(cfg_grammar)
        self.origin_file = origin_file
        self.oracle_file = oracle_file
        self.wi_dict = wi_dict
        self.iw_dict = iw_dict
        self.sequence_length = sequence_length
        self.generate_from_scratch = generate_from_scratch
        self.vocab_size = None
        import os, inspect
        self.saving_path = os.path.dirname(
            os.path.abspath(inspect.getfile(
                inspect.currentframe()))) + '/save/'
        return
コード例 #8
0
def rand_sentences(n=10, depth=6, wpt=0.25):
    #grammar = CFG.fromstring(open('assets/text/grammar.txt', 'r').read())
    grammar = CFG.fromstring(rand_vocabulary(wpt))
    sentences = list(generate(grammar, n=n * 20, depth=depth))
    return [
        ' '.join(i) for i in random.sample(sentences, min(n, len(sentences)))
    ]
コード例 #9
0
def get_pos_tags(pos_tuples):
    """
    Returns the POS tags from POS tuples of (word, tag)
    Updates the grammar for unknown tags
    """

    global grammar_string
    global grammar
    global terminals

    changed_grammar = False
    pos_tags = []

    for pos_tuple in pos_tuples:
        tag = pos_tuple[1]

        if tag not in terminals:

            if tag == '\'\'':
                tag = 'APOS'

            grammar_string += ' | \'' + tag + '\''

            terminals[tag] = None
            changed_grammar = True

        pos_tags.append(tag)

    if changed_grammar:
        grammar = CFG.fromstring(grammar_string)

    return pos_tags
コード例 #10
0
 def __init__(self, blackboard):
     super(SentenceExpert, self).__init__(blackboard, "Sentence Expert")
     self.eva = ["be", "look", "feel"]
     self.atv = ["like", "hate", "love", "know", "need", "see"]
     """ eva - emotional verb active
         evp - emotional verb passive
         ej - emotion adjective
         en - emotional noun
         atv - attitude verb
     """
     self.grammar = CFG.fromstring("""
         S -> P | EP | Person ATV NP
         P -> NP VP
         EP -> Person EVA EJ | NP EVP Pron EJ | ENP VP
         ENP ->  EN OF NP
         NP -> Det N | Det JJ N | Det EJ JJ N | Det EJ N | Det EN
         VP -> V | V ERB | ERB V
         Det -> 'the'
         N -> 'n'
         V -> 'v'
         EVA -> 'eva'
         EVP -> 'makes'
         EN -> 'en'
         EJ -> 'ej'
         JJ -> 'adj'
         ERB -> 'erb'
         ATV -> 'atv'
         Person -> 'person'
         Pron -> 'pron'
         OF -> 'of'
         CC -> 'and' | 'but' | 'because' | 'so'
         """)
コード例 #11
0
ファイル: syllable.py プロジェクト: mouse-reeve/langmaker
    def __init__(self, phonemes=None, onset=None, coda=None):
        self.phonemes = phonemes or Phoneme()

        # use CFG to structure syllables
        if onset == None: # optional onset
            onset = 'C | C C | \' \''
        elif onset: # mandatory onset
            onset = 'C | C C'
        else: # no onset
            onset = '\' \''

        if coda == None: # optional coda
            coda = 'C | \' \''
        elif coda: # mandatory coda
            coda = 'C'
        else: # no coda
            coda = '\' \''
        # nucleus is always present

        # based on the "typical model"
        grammar = '''
        S -> O V K
        O -> %s
        K -> %s
        C -> \'c\'
        V -> \'v\'
        ''' % (onset, coda)
        self.grammar = CFG.fromstring(grammar)
        self.syllables = self.generate_syllables()
コード例 #12
0
ファイル: Tweet_content1.py プロジェクト: BelhalK/twitterbot
def Tweet_content1():
  grammar = CFG.fromstring(demo_grammar)

  for sentence in generate(grammar, n=4): """generating sentence of 4 words depth"""
    print(' '.join(sentence))
    
    return sentence
コード例 #13
0
def gen_grammar_plural(verb, direct_object, count):
	try:
		verb = en.verb.present_participle(verb)
	except KeyError:
		return
	if verb != "":
		g1 ="""
		S -> WA TR SUB V DO '?' | W TR SUB V '?' 
		W -> 'who' | 'what' | 'when' | 'where' | 'why' | 'how'
		WA -> 'when' | 'where' | 'why' | 'how'
		TR -> 'are' | 'were'
		SUB -> 'they' | 'you'
		V -> '%s'
		DO -> 'the %s'
		"""%(verb, direct_object)
		grammar1 = CFG.fromstring(g1)
		multiplier = 1
		with open('sentences.csv', 'ab') as csvwriter:
			writer = csv.writer(csvwriter)
			for sentence in generate(grammar1, n=999):
				sentence = ' '.join(sentence)
				if sentence.find('who') == 0:
					multiplier = 1
				if sentence.find('what') == 0:
					multiplier = 1
				if sentence.find('when') == 0:
					multiplier = 2
				if sentence.find('where') == 0:
					multiplier = 2
				if sentence.find('why') == 0:
					multiplier = 4
				if sentence.find('how') == 0:
					multiplier = 4
				writer.writerow((' '.join(sentence) , multiplier*count))
コード例 #14
0
def gen_grammar3_past_plural(verb, direct_object, count):
	g1 ="""
	S -> W TR SUB V '?' | WA TR SUB V DO '?' 
	W -> 'who' | 'what' | 'when' | 'where' | 'why' | 'how'
	WA -> 'when' | 'where' | 'why' | 'how'
	TR -> 'have'
	SUB -> PRO
	PRO -> 'they' |'you'
	V -> '%s'
	DO -> 'the %s'
	"""%(verb, direct_object)
	grammar1 = CFG.fromstring(g1)
	multiplier = 0
	with open('sentences.csv', 'ab') as csvwriter:
		writer = csv.writer(csvwriter)
		for sentence in generate(grammar1, n=999):
			if sentence.find('who') == 0:
				multiplier = 1
			if sentence.find('what') == 0:
				multiplier = 1
			if sentence.find('when') == 0:
				multiplier = 2
			if sentence.find('where') == 0:
				multiplier = 2
			if sentence.find('why') == 0:
				multiplier = 4
			if sentence.find('how') == 0:
				multiplier = 4
			writer.writerow((' '.join(sentence) , multiplier*count))
コード例 #15
0
def gen_grammar_plural(verb, direct_object, count):
    try:
        verb = en.verb.present_participle(verb)
    except KeyError:
        return
    if verb != "":
        g1 = """
		S -> WA TR SUB V DO '?' | W TR SUB V '?' 
		W -> 'who' | 'what' | 'when' | 'where' | 'why' | 'how'
		WA -> 'when' | 'where' | 'why' | 'how'
		TR -> 'are' | 'were'
		SUB -> 'they' | 'you'
		V -> '%s'
		DO -> 'the %s'
		""" % (verb, direct_object)
        grammar1 = CFG.fromstring(g1)
        multiplier = 1
        with open('sentences.csv', 'ab') as csvwriter:
            writer = csv.writer(csvwriter)
            for sentence in generate(grammar1, n=999):
                sentence = ' '.join(sentence)
                if sentence.find('who') == 0:
                    multiplier = 1
                if sentence.find('what') == 0:
                    multiplier = 1
                if sentence.find('when') == 0:
                    multiplier = 2
                if sentence.find('where') == 0:
                    multiplier = 2
                if sentence.find('why') == 0:
                    multiplier = 4
                if sentence.find('how') == 0:
                    multiplier = 4
                writer.writerow((' '.join(sentence), multiplier * count))
コード例 #16
0
def grammar_extraction(population_g, inital_state, subs):

    population_s = {}

    for pop in population_g:
        p = [inital_state]

        for n in population_g[pop]:

            if n != 0:
                p.append(n)

            else:
                pass

        separ = "\n"
        prime_grammar = separ.join(p)
        pre_grammar = prime_grammar.format(subs=subs)

        pos_grammar = """
    {seed}
    """.format(seed=prime_grammar)

        post_grammar = """
    {seed}
    """.format(seed=pre_grammar)
        grammar_use = CFG.fromstring(post_grammar)

        population_s[pop] = (grammar_use, pos_grammar)

    return population_s
コード例 #17
0
def draw_1(s):
    m = s
    l = fool.cut(s)[0]
    print(l)
    p = product_grammar(m)
    grammar = CFG.fromstring("""
    S ->NP V NP U L|NP U NP V L| NP U L V NP|L U NP V NP|L V NP U NP|NP V L U NP
    NP -> N N|r NP|NP A NP|M Q NP|N|NP U NP|A U NP|N NP|NP C NP|NP U|M NP
    VP ->V|V NP|V VP|A VP|VP NP|VP U|VP C VP|VP P|VP uguo
    V -> v|vi|vshi
    N ->n|nr|t|ns|f|nx|nz
    R ->r
    C ->c
    P ->p
    L ->R|R NP
    U ->ude|y
    A ->a|d|ad
    M ->m
    Q ->q
    """ + p)
    cp = nltk.ChartParser(grammar)
    tree = cp.parse(l)
    stree = []
    for s in tree:
        st = []
        #s.draw()
        for i in range(len(s)):
            st.append([s[i].label(), ''.join(s[i].leaves())])
        stree.append(st)
    return stree
コード例 #18
0
 def setUp(self):
     if not exists(self.LEXICON_FILE_NAME):
         self.skipTest("Unable to find file {} as lexicon".format(
             self.LEXICON_FILE_NAME))
     if not exists(self.GRAMMAR_FILE_NAME):
         self.skipTest("Unable to find file {} as grammar".format(
             self.GRAMMAR_FILE_NAME))
     assert exists(self.PARSE_TREES_FILE_NAME)
     
     valid,lexiconText = q1utils.sanitizeAndValidateLexicon(
         self.LEXICON_FILE_NAME)
     if not valid:
         self.skipTest("Lexicon {} is invalid.".format(
             self.LEXICON_FILE_NAME))
     
     valid,grammarText = q1utils.sanitizeAndValidateGrammar(
         self.GRAMMAR_FILE_NAME)
     if not valid:
         self.skipTest("Grammar {} is invalid.".format(
             self.GRAMMAR_FILE_NAME))
     
     allRules = grammarText + '\n' + lexiconText
     
     try:
         grammar = CFG.fromstring(allRules)
         self._parser = BottomUpChartParser(grammar)
     except Exception as e:
         self.skipTest(str(e))
コード例 #19
0
def demo():
    """
    A demonstration of the recursive descent parser.
    """

    from nltk import parse, CFG

    grammar = CFG.fromstring(
        """
    S -> NP VP
    NP -> Det N | Det N PP
    VP -> V NP | V NP PP
    PP -> P NP
    NP -> 'I'
    N -> 'man' | 'park' | 'telescope' | 'dog'
    Det -> 'the' | 'a'
    P -> 'in' | 'with'
    V -> 'saw'
    """
    )

    for prod in grammar.productions():
        print(prod)

    sent = "I saw a man in the park".split()
    parser = parse.RecursiveDescentParser(grammar, trace=2)
    for p in parser.parse(sent):
        print(p)
コード例 #20
0
ファイル: shiftreduce.py プロジェクト: yuanlanda/nltk
def demo():
    """
    A demonstration of the shift-reduce parser.
    """

    from nltk import parse, CFG

    grammar = CFG.fromstring(
        """
    S -> NP VP
    NP -> Det N | Det N PP
    VP -> V NP | V NP PP
    PP -> P NP
    NP -> 'I'
    N -> 'man' | 'park' | 'telescope' | 'dog'
    Det -> 'the' | 'a'
    P -> 'in' | 'with'
    V -> 'saw'
    """
    )

    sent = "I saw a man in the park".split()

    parser = parse.ShiftReduceParser(grammar, trace=2)
    for p in parser.parse(sent):
        print(p)
コード例 #21
0
ファイル: grammarhelper.py プロジェクト: orestes1986/voz2b
def do_grammar_tests():
    from nltk import CFG
    grammar_files = [
        'grammar-mpropp.txt', 'grammar-mpropp2.txt', 'grammar-lakoff.txt',
        'grammar-gervas.txt', 'grammar-finlayson.txt'
    ]
    grammar_test = [
        i.split() for i in open('data/grammar-test-filtered.txt').readlines()
    ]
    for i in grammar_files:
        grammar_file = 'data/' + i
        print grammar_file, '\t',
        g = CFG.fromstring(open(grammar_file).read())
        #pprint.pprint(g.productions())
        coverage = True
        for i, tokens in enumerate(grammar_test):
            try:
                g.check_coverage(tokens)
                print 1,
            except Exception as e:
                print 0,  #,e`
                coverage = False
        print
        #rdp = nltk.RecursiveDescentParser(g)
        #srp = nltk.ShiftReduceParser(g)
        #bulccp = nltk.BottomUpLeftCornerChartParser(g)
        if coverage:
            for i, tokens in enumerate(grammar_test):
                pass
コード例 #22
0
ファイル: shiftreduce.py プロジェクト: prz3m/kind2anki
def demo():
    """
    A demonstration of the shift-reduce parser.
    """

    from nltk import parse, CFG

    grammar = CFG.fromstring(
        """
    S -> NP VP
    NP -> Det N | Det N PP
    VP -> V NP | V NP PP
    PP -> P NP
    NP -> 'I'
    N -> 'man' | 'park' | 'telescope' | 'dog'
    Det -> 'the' | 'a'
    P -> 'in' | 'with'
    V -> 'saw'
    """
    )

    sent = 'I saw a man in the park'.split()

    parser = parse.ShiftReduceParser(grammar, trace=2)
    for p in parser.parse(sent):
        print(p)
コード例 #23
0
ファイル: sample.py プロジェクト: koboriakira/check-words
def execute(text: str):
    groucho_grammer = CFG.fromstring("""
    S -> NP VP
    PP -> P NP
    NP -> Det N | Det N PP | 'I'
    VP -> V NP | VP PP
    Det -> 'an' | 'my'
    N -> 'elephant' | 'pajamas'
    V -> 'shot'
    P -> 'in'
    """)
    parser = ChartParser(groucho_grammer)

    tokens = word_tokenize(text=SAMPLE_3)
    print(type(tokens))
    print(tokens)
    for tree in parser.parse(tokens=[
            'The',
            'little',
            'bear',
            'saw',
            'the',
            'fine',
            'fat',
            'trout',
            'in',
            'the',
            'brook',
    ]):
        print(tree)
コード例 #24
0
ファイル: OracleCfg.py プロジェクト: Leno1993/RecommendSystem
    def __init__(self,
                 cfg_grammar=None,
                 origin_file='save/origin.txt',
                 oracle_file='save/oracle.txt',
                 wi_dict='save/word_index_dict.json',
                 iw_dict='save/index_word_dict.json',
                 sequence_length=None):
        if cfg_grammar is None:
            cfg_grammar = """
              S -> S PLUS x | S SUB x |  S PROD x | S DIV x | x | '(' S ')'
              PLUS -> '+'
              SUB -> '-'
              PROD -> '*'
              DIV -> '/'
              x -> 'x' | 'y'
            """

        self.grammar = CFG.fromstring(cfg_grammar)
        self.origin_file = origin_file
        self.oracle_file = oracle_file
        self.wi_dict = wi_dict
        self.iw_dict = iw_dict
        self.sequence_length = sequence_length
        self.vocab_size = None
        return
コード例 #25
0
def demo():
    """
    A demonstration of the recursive descent parser.
    """

    from nltk import parse, CFG

    grammar = CFG.fromstring("""
    S -> NP VP
    NP -> Det N | Det N PP
    VP -> V NP | V NP PP
    PP -> P NP
    NP -> 'I'
    N -> 'man' | 'park' | 'telescope' | 'dog'
    Det -> 'the' | 'a'
    P -> 'in' | 'with'
    V -> 'saw'
    """)

    for prod in grammar.productions():
        print(prod)

    sent = 'I saw a man in the park'.split()
    parser = parse.RecursiveDescentParser(grammar, trace=2)
    for p in parser.parse(sent):
        print(p)
コード例 #26
0
def get_parser_for_grammar(input_code='program.gir', grammar_name='grammar'):
    terminal_rules = get_terminal_rules(read_lines(input_code))

    with open(grammar_name, 'r') as f:
        lines = '\n'.join([x for x in f.readlines() if x[0] != '#'])
        lines = lines + '\n' + '\n'.join(terminal_rules)
        return nltk.ChartParser(CFG.fromstring(lines))
コード例 #27
0
def restore(sents, mint=None, maxt=None, minh=None, maxh=None):
    """Get best infered grammar

    Parameters
    ----------
    sents: collection of str
        sentences to use in restoration
    mint: int
        check up values of t starting from this value

    maxt: int
        check up values of t up to this value

    minh: int
        check up values of h starting from this value

    maxh: int
        check up values of h up to this value

    Returns
    -------
    grammar : nltk.CFG
    """
    res = restore_all(sents, mint, maxt, minh, maxh)
    simplest = min(res.values(), key=cmp_to_key(_cmp_grammar_simplicity))

    return CFG.fromstring(simplest)
コード例 #28
0
    def generate(self,
                 n=10,
                 verb='intransitive',
                 rc='none',
                 pp='none',
                 ident=False):
        """
            Generate input-output pairs with the main auxiliary in the given 
            language. Arguments specify whether the verb should be transitive 
            or intransitive, the position of the relative clause, and the 
            position of the prepositional phrase.
            The vocabulary used in this function is a random sample (class-wise)
            if the entire vocabulary to allow for generating sentences in a 
            reasonable amount of time.

            Args:
                n: integer number of pairs to be generated
                verb: 'transitive' or 'intransitive', type of verb
                rc: 'none', 'subject', or 'object', position of relative clause
                pp: 'none', 'subject', or 'object', position of prepositional
                    phrase
                ident: boolean indicating whether output is identical sentence 
                    or question
            
            Return:
                list of tuples (input, output, main_aux)
        """

        grammar = CFG.fromstring(self.get_grammar_string(verb, rc, pp))

        sentences = list()
        for sentence in generate_from_cfg(grammar, n=n):
            sentences.append(Language.transform(sentence, ident))
        return sentences
コード例 #29
0
ファイル: main.py プロジェクト: Inthuch95/CS5012-p2-grammar
def context_free_grammar():
    cfg = CFG.fromstring("""\
    ################# Rules #################
    S -> NP VP
    S -> PP NP VP
    S -> Wh Aux NP VP 
    NP -> ProperNoun | CC ProperNoun | N | ProperNoun NP | AP N | DET NP | N PP    
    VP -> V | V NP | Adv VP | V NP VP
    AP -> Adj | Adj AP
    PP -> P NP | P NP VP
    
    ################# Lexicons ################# 
    N -> 'milk'| 'shoes' | 'salad' | 'kitchen' | 'midnight' | 'table'
    V -> 'laughs' | 'laughed' | 'drink' | 'wears' | 'serves' | 'drinks' | 'thinks' | 'wear'
    ProperNoun -> 'Bart' | 'Homer' | 'Lisa'
    Aux -> 'do' | 'does'
    CC -> 'and'
    Adj -> 'blue' | 'healthy' | 'green' 
    DET -> 'a' | 'the' 
    Adv -> 'always' | 'never' 
    P -> 'in' | 'before' | 'on' | 'when'
    Wh -> 'when'
    """)
    cfparser = ChartParser(cfg)
    sents = text.splitlines()
    for sent in sents:
        parses = cfparser.parse(sent.split())
        print(sent)
        for tree in parses:
            print(tree)
コード例 #30
0
def perform_function(sentence):
    # print(sentence)
    output = ""
    g_string = (" SIGMA -> DELTA\n"
                " DELTA -> S P C|S P C A|S P A | S P \n"
                " A -> Pre Comp \n"
                " S -> h |m h\n"
                " C -> m h|h\n"
                " P -> n l|aux l| l \n"
                " m -> d e| d\n"
                " h -> " + name_string + "\n"
                " l -> 'boarded'|'cooked'|'climbed'|'bought'|'gave'\n"
                " Pre -> 'ni'\n"
                " e -> 'black'\n"
                " d -> 'the'|'The'\n"
                " aux -> 'n'")
    gramma = CFG.fromstring(g_string)
    parser = nltk.ChartParser(gramma)
    try:
        ans = parser.parse(sentence.split())
        output = " ".join(str(x) for x in list(ans))
    except ValueError as e:
        # print("error : " + str(e))
        output = "Error : " + str(e)
    return output
コード例 #31
0
def main():
    source = "./grammar.cfg"

    sentences = [
        "skywalker sarà tuo apprendista",  #tuo apprendista skywalker sarà
        "tu avrai novecento anni di età",  # novecento anni di età tu avrai
        "tu hai amici lì",  # amici lì tu hai
        "noi siamo illuminati",  # illuminati noi siamo
        "il lato oscuro è arduo da vedere",  # arduo da vedere il lato oscuro è
        "tu hai molto da apprendere ancora",  # molto da apprendere ancora tu hai
        "skywalker corre veloce",  # veloce Skywalker corre
        "il futuro di questo ragazzo è nebuloso"
    ]  # nebuloso il futuro di questo ragazzo è

    with open(source, encoding='utf-8') as file:
        grammar = CFG.fromstring(file.read())
        #print(grammar)

    i = 0
    if grammar.is_chomsky_normal_form():
        for sent in sentences:
            it_tree = cky(sent.split(), grammar)
            save_tree("it" + str(i), it_tree)
            it_tree.draw()
            if (it_tree is not None):
                yoda_tree = translate_it_yo(it_tree)
                save_tree("yo" + str(i), yoda_tree)
                yoda_tree.draw()
            i += 1
    else:
        exit('Error: the grammar must be in Chomsky Normal Form')
コード例 #32
0
 def generate_from_grammar(self, n, depth):
     grammar = CFG.fromstring(self.gramma)
     print("Generuje dla n " + n + " i depth " + depth)
     for track in generate(grammar, n=int(n), depth=int(depth)):
         self.track_array.append(' '.join(track))
         # produkcje
         numbers = " ".join(track)
         self.productions.append(numbers)
コード例 #33
0
def restore_all(sents, mint=None, maxt=None, minh=None, maxh=None):
    """Get all infered grammars

    For all combinations of parameters `t` and `h` there may be a different grammars

    Grammar syntax example:

    S -> 'c' A 'a' B | 'b'

    A -> 'a' A | 'A'

    B -> 'b' A

    Parameters
    ----------
    sents: collection of str
        sentences to use in restoration
    mint: int
        check up values of t starting from this value

    maxt: int
        check up values of t up to this value

    minh: int
        check up values of h starting from this value

    maxh: int
        check up values of h up to this value

    Returns
    -------
    grammars : dict of str
        grammar strings for every valid pair of t and h
    """
    maxlen = len(max(sents, key=len))
    mint = mint if mint is not None else 1
    minh = minh if minh is not None else 1

    maxt = maxt if maxt is not None else maxlen
    maxh = maxh if maxh is not None else maxlen

    res = {}
    for t, h in itertools.product(range(mint, maxt + 1), range(minh, maxh + 1)):
        p = Pnet(sents)
        p = net_transform(p, t, h)
        _, g_str = net_to_grammar(p, t)

        g = CFG.fromstring(g_str)

        if all(check_grammar(g, s) for s in sents):
            print(f'Success with t={t}, h={h}')
            print(g_str, '\n')
            res[(t, h)] = g_str
        else:
            print(f'Fail with t={t}, h={h}')

    return res
コード例 #34
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-u',
                        '--upper',
                        type=int,
                        required=True,
                        help='Model size upper bound')
    parser.add_argument('-d',
                        '--depth',
                        type=int,
                        required=True,
                        help='Maximum CFG production depth considered')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help='Show progress and timing')
    parser.add_argument('-i',
                        '--input',
                        type=str,
                        required=True,
                        help='Path to text file containing CFG specification')
    parser.add_argument('-e',
                        '--expr',
                        type=str,
                        required=True,
                        help='Path quantifier expressions should'
                        ' be saved in')
    parser.add_argument('-b',
                        '--bit',
                        type=str,
                        required=True,
                        help='Path quantifier bitstrings should be saved in')

    args = parser.parse_args()

    upper = args.upper
    max_depth = args.depth
    verbose = args.verbose
    in_file = args.input
    expr_file = args.expr
    bit_file = args.bit

    with open(in_file, 'r') as f:
        grammar_str = f.read()

        # NLTK does not like unnecessary indentation
        pattern = re.compile(r'\n\s+\|')
        grammar_str = pattern.sub(' |', grammar_str)
        grammar = CFG.fromstring(grammar_str)

    qg = QuantifierGenerator(grammar, upper, max_depth, verbose)

    with open(expr_file, 'w') as f_expr:
        with open(bit_file, 'wb') as f_bit:
            for expr, q_str in qg.generate():
                f_expr.write(f'{expr}\n')
                f_bit.write(q_str.tobytes())
コード例 #35
0
ファイル: generate.py プロジェクト: dmtrek14/name-generation
def generate_name(G):
    grammar = CFG.fromstring(G)

    parser = ChartParser(grammar)

    gr = parser.grammar()
    tokens = produce(gr, gr.start())
    name = ''.join(tokens)
    return name.title()
コード例 #36
0
def get_ltl_grammar(symbols, env_name=''):
    if env_name == 'Craft':
        alphabet_str = ' | '.join(
            ["'" + a + "'" for a in symbols if 'C_' not in a])
        grammar_str = SIMPLER_CRAFT_LTL_GRAMMAR % alphabet_str
    else:
        alphabet_str = ' | '.join(["'" + a + "'" for a in symbols])
        grammar_str = LTL_GRAMMAR % alphabet_str
    grammar = CFG.fromstring(grammar_str)
    return grammar
コード例 #37
0
ファイル: nltk_grammar.py プロジェクト: husikl/lm-heuristic
 def from_cfg_file(cls, path: str, **kwargs) -> "CFGrammarNode":
     """
     :param path: path to file containing a context-free grammar
     :return: new Derivation tree node
     """
     assert os.path.exists(path)
     with open(path) as file:
         str_grammar = file.read()
     nltk_grammar = CFG.fromstring(str_grammar)
     return cls(nltk_grammar.start(), nltk_grammar, **kwargs)
コード例 #38
0
    def __init__(self, grammar: str, depth: Optional[int] = None) -> None:
        """Load and initialize a context-free grammar.

        :param str grammar: An EBNF grammar as a string
        """
        super().__init__()
        self.grammar = grammar
        self.cfg = CFG.fromstring(self.grammar)
        self.start = self.cfg.start()

        self._generator = self._get_generator(depth=depth)
コード例 #39
0
    def readfile(self, filename):

        print "Reading " + filename
        processed = 0
        with open(filename) as instream:
            for line in instream:
                processed += self.processline(line)
                if processed > 0 and CorpusGenerator.debug:
                    break
        self.grammar = CFG.fromstring(self.grammarstring)
        self.start = self.grammar.start()
コード例 #40
0
def chart_parse(in_file, grammar_file, out_file):
    text = unicode(open(in_file, 'r').read(), errors='ignore')
    output = open(out_file, 'w')
    grammar_string = unicode(open(grammar_file, 'r').read(), errors='ignore')
    try:
        grammar = CFG.fromstring(grammar_string)
        parser = nltk.ChartParser(grammar)
        sentences = nltk.sent_tokenize(text)
        for sentence in sentences:
            words = nltk.word_tokenize(sentence)
            tree = parser.parse(words)
            for item in tree:
                output.write(str(item))
                output.write('\n')
    except Exception, e:
        message = "Error with parsing. Check the input files are correct and the grammar contains every word in the input sequence. \n----\n" + str(e)
        sys.stderr.write(message)
        sys.exit()
コード例 #41
0
ファイル: chapter8.py プロジェクト: hbdhj/python
def someGrammaticalDilemmas():
    print "page 292 8.1  Some Grammatical Dilemmas"
    print "=============== Linguistic Data and Unlimited Possibilities ==============="
    from nltk import CFG
    groucho_grammar = CFG.fromstring(""" 
        S -> NP VP
        PP -> P NP
        NP -> Det N | Det N PP | 'I' 
        VP -> V NP | VP PP 
        Det -> 'an' | 'my' 
        N -> 'elephant' | 'pajamas' 
        V -> 'shot'
        P -> 'in'
        """)
    sent = ['I', 'shot', 'an', 'elephant', 'in', 'my', 'pajamas']
    parser = nltk.ChartParser(groucho_grammar)
    trees = parser.nbest_parse(sent)
    for tree in trees:
        print tree
コード例 #42
0
ファイル: cky.py プロジェクト: BabisK/M36209P
def main():
    parser = argparse.ArgumentParser(description='CKY and PCKY')
    parser.add_argument('-g', '--grammar', help='Input file name', required=True)
    parser.add_argument('-s', '--sentence', help='Input sentence', required=True)
    args = parser.parse_args()

    grammar_text = None
    with open(args.grammar, 'r') as f:
        grammar_text = f.read()

    grammar = None
    result = None
    try:
        grammar = CFG.fromstring(grammar_text)
    except ValueError:
        grammar = PCFG.fromstring(grammar_text)

    if type(grammar) is CFG:
        result = cky(args.sentence, grammar)
    elif type(grammar) is PCFG:
        result = pcky(args.sentence, grammar)
コード例 #43
0
ファイル: OracleCfg.py プロジェクト: IshJ/Texygen
    def __init__(self, cfg_grammar=None, origin_file='save/origin.txt', oracle_file='save/oracle.txt',
                 wi_dict='save/word_index_dict.json', iw_dict='save/index_word_dict.json',
                 sequence_length=None):
        if cfg_grammar is None:
            cfg_grammar = """
              S -> S PLUS x | S SUB x |  S PROD x | S DIV x | x | '(' S ')'
              PLUS -> '+'
              SUB -> '-'
              PROD -> '*'
              DIV -> '/'
              x -> 'x' | 'y'
            """

        self.grammar = CFG.fromstring(cfg_grammar)
        self.origin_file = origin_file
        self.oracle_file = oracle_file
        self.wi_dict = wi_dict
        self.iw_dict = iw_dict
        self.sequence_length = sequence_length
        self.vocab_size = None
        return
コード例 #44
0
def CFG_grammar():
    GOAL_FIND,ENTITY_PLACE = nonterminals('GOAL_FIND,ENTITY_PLACE')
    usr_goal = ENTITY_PLACE
    usr_find = GOAL_FIND
    VP,NP,O = nonterminals('VP,NP,O')

    # Build a CFG based on the symbols that generated above.
    grammar = CFG.fromstring("""
    VP -> GOAL_FIND O ENTITY_PLACE | GOAL_FIND ENTITY_PLACE
    NP -> P ENTITY_PLACE | ENTITY_PLACE
    GOAL_FIND -> 'find'
    GOAL_FIND  -> 'show'
    GOAL_FIND  -> 'tell'
    O -> 'me'
    P -> 'in'
    ENTITY_PLACE -> 'starbucks'
    ENTITY_PLACE -> 'the starbucks'
    ENTITY_PLACE -> 'a starbucks'
    ENTITY_PLACE -> 'coffee bean'
    ENTITY_PLACE -> 'the coffee bean'
    ENTITY_PLACE -> 'a coffee bean'

    """)
    return grammar
コード例 #45
0
ファイル: hw8.py プロジェクト: caiqizhe/Archive
	def load_grammar( self ):
		s = open( self.name + '.cfg' ).read()
		self.grammar = CFG.fromstring(s)
		return
コード例 #46
0
import nltk
from nltk import CFG

grammar = CFG.fromstring("""
	S -> NP VP
	NP -> Det Noun | Noun Adj
	VP -> Verb NP
	Det -> 'el'
	Noun -> 'gato' | 'pescado'
	Verb -> 'come'
	Adj -> 'crudo'
	""")

def dibujo_arbol(texto):
	sent = texto.split()
	parser = nltk.ChartParser(grammar)
	for tree in parser.parse(sent):
		print(tree)
		tree.draw()


dibujo_arbol('el gato come pescado crudo')
dibujo_arbol('gato crudo come el gato')
dibujo_arbol('el pescado come gato crudo')
	

コード例 #47
0
from nltk import CFG
from nltk import parse
from nltk import Tree

grammar = CFG.fromstring('''
   S     -> WHO QP QM | WHICH Nom QP QM
   QP    -> VP | DO NP T
   VP    -> I | T NP | BE A | BE NP | VP AND VP
   NP    -> P | AR Nom | Nom
   Nom   -> AN | AN Rel
   AN    -> N | A AN
   Rel   -> WHO VP | NP T
   N     -> "Ns" | "Np"
   I    -> "Is" | "Ip"
   T    -> "Ts" | "Tp"
   A     -> "A"
   P     -> "P"
   BE    -> "BEs" | "BEp"
   DO    -> "DOs" | "DOp"
   AR    -> "AR"
   WHO   -> "WHO"
   WHICH -> "WHICH"
   AND   -> "AND"
   QM    -> "?"
   ''')

chartpsr = parse.ChartParser(grammar)

def all_parses(wlist,lx):
    """returns all possible parse trees for all possible taggings of wlist"""
コード例 #48
0
ファイル: simple_sds.py プロジェクト: EMCSlabs/Programs
# Tokenize the sentence.
tokenized = word_tokenize(words)

# Build the grammar for parsing.
GOAL_FIND,ENTITY_PLACE = nonterminals('GOAL_FIND,ENTITY_PLACE')
usr_goal = ENTITY_PLACE
usr_find = GOAL_FIND
VP,NP,O = nonterminals('VP,NP,O')

grammar = CFG.fromstring("""
VP -> GOAL_FIND O ENTITY_PLACE | GOAL_FIND ENTITY_PLACE
NP -> P ENTITY_PLACE | ENTITY_PLACE
GOAL_FIND -> 'find'
GOAL_FIND  -> 'show'
GOAL_FIND  -> 'tell'
O -> 'me'
P -> 'in'
ENTITY_PLACE -> 'starbucks'
ENTITY_PLACE -> 'Starbucks'
ENTITY_PLACE -> 'Coffee Bean'
ENTITY_PLACE -> 'Coffeebean'

""")
rd_parser = RecursiveDescentParser(grammar)

# Parsing the sentence.
parsed_words = []
for parsing in rd_parser.parse(tokenized):
    print(parsing)

# Find GOAL and ENTITY
for detect in parsing:
コード例 #49
0
            # V -> 'VBZ'
            # P -> 'PP'
            
            
            # adverb is RB
            
            if 'NN' in pos_words:
                grammar += 'N ->' + ' | '.join(pos_words['NN']) + '\n'
            
            if 'VB' in pos_words:
                grammar += 'V ->' + ' | '.join(pos_words['VB']) + '\n'
                
            if 'JJ' in pos_words:
                grammar += 'A ->' + ' | '.join(pos_words['JJ']) + '\n'
                
            simple_grammar = CFG.fromstring(grammar)
            #simple_grammar.start()
            simple_grammar.productions()
            
            sentences = []
            for sentence in generate(simple_grammar, n=10):
                sentences.append(' '.join(sentence))
            
            # parser = nltk.ChartParser(simple_grammar)
            # tree = parser.parse(pos_tags)
            


            caption = 'this is a caption'
            story = 'this is the story'
            
コード例 #50
0
''' Generate horoscopes '''
import logging
from nltk.grammar import Nonterminal
from nltk import CFG
from os import path
import random
import re

HERE = path.abspath(path.dirname(__file__))

try:
    GRAMMAR = CFG.fromstring(open('%s/data/grammar.txt' % HERE).read())
except IOError:
    logging.error('Unable to load grammar file')
    raise IOError

def get_sentence(start=None, depth=7):
    ''' follow the grammatical patterns to generate a random sentence '''
    if not GRAMMAR:
        return 'Please set a GRAMMAR file'

    start = start if start else GRAMMAR.start()

    if isinstance(start, Nonterminal):
        productions = GRAMMAR.productions(start)
        if not depth:
            # time to break the cycle
            terminals = [p for p in productions if not isinstance(start, Nonterminal)]
            if len(terminals):
                production = terminals
        production = random.choice(productions)
コード例 #51
0
'''

from nltk.parse.generate import generate #, demo_grammar
from nltk import CFG


demo_grammar = """
  S -> NP VP
  NP -> Det N
  PP -> P NP
  VP -> 'slept' | 'saw' NP | 'walked' PP
  Det -> 'the' | 'a'
  N -> 'man' | 'park' | 'dog'
  P -> 'in' | 'with'
"""
grammar = CFG.fromstring(demo_grammar)
print(grammar)


#Join words and generate based off of grammar - for n 
for sentence in generate(grammar, n=12):
    print(' '.join(sentence))

'''
Notes: 
Need to symbolize the grammar
Have the machine process the language
Need to integrate with Markov chain - file 'agiliq-markov.py'
'''
for sentence in generate(grammar, depth=4):
    print(' '.join(sentence))
コード例 #52
0
ファイル: assistant.py プロジェクト: ActionSeeker/Chatbot
def main():
	while 1 == 1 :

		print("Enter a statement")
		statement = raw_input().strip()
		if statement == '':
			continue
		if statement.lower() in ['bye','goodbye','tata','good-bye']:
			print("Good-bye, dear human")
			exit()
		userNameLoader() #loads the username

		tagged_arr = Viterbi(statement)

		tokens = word_tokenize(statement)

		isFile = False
		isDir = False

		#check if all of the elements are same
		count = 1
		tag = tagged_arr[1]
		for i in range(2,len(tagged_arr)):
			if tagged_arr[i] == tag:
				count = count + 1
		
		if count == len(tagged_arr)-1:
			n = len(tokens)
			for i in range(0,n):				
				tag_temp = Viterbi(tokens[i])[1]
				tagged_arr[i+1] = tag_temp

		for i in range(0,len(tokens)):
			if i+2 <= len(tokens):
				if tokens[i] in ['folder','file','directory'] and tagged_arr[i+2] in ['VB','VBN']:
					tagged_arr[i+1] = 'NN'
			elif tokens[i] in ['folder','file','directory'] and tagged_arr[i] in ['VB','VBN']:
					tagged_arr[i+1]='NN'

		for i in range (0,len(tokens)):
			if tagged_arr[i+1] in ['NN','NNS','NP','VB','AN','JJ'] and tokens[i]!= 'open':
				for j in range(0,len(appnames)):
					if tokens[i].lower() in appnames[j] and tokens[i].lower() not in ['file','folder','directory','copy','videos','desktop']:
						tagged_arr[i+1]='AN'
						tokens[i] = commands[j]
						isFile = True
						break
				if isDirName(userName,tokens[i])==True:
						tagged_arr[i+1] = 'AN'
						isDir = True
				elif isFileName(userName,tokens[i])==True:
						tagged_arr[i+1] = 'AN'
						isFile = True

		for i in range (0,len(tokens)):
			if tokens[i] in verbList:
				tagged_arr[i+1] = 'VB'
				break
			elif tokens[i] in ['words','lines']:
				tagged_arr[i+1] = 'NNS'
				break				
		
		#print(tagged_arr)

		grammar_string = """
		  S -> NPP VP
		  S -> VP
		  NPP -> MODAL PRONOUN | NOUN VA | APPNAME
		  NPP -> DET FOLDER VERB NAME | FOLDER VERB NAME| FOLDER NAME | DET NAME
		  NPP -> DET JJ FOLDER VERB NAME | JJ FOLDER VERB NAME| JJ FOLDER NAME
		  NPP -> DET AN FOLDER VERB NAME | AN FOLDER VERB NAME| AN FOLDER NAME
		  NPP -> DET APPNAME
		  NPP -> BACK TONAME | DET BACK TONAME
		  NPP -> WQUERY
		  WQUERY -> WQL AP NOUN | WRB AP NOUN
		  BACK -> 'background' | 'BACKGROUND' | 'Background'
		  BACK -> 'wallpaper' | 'WALLPAPER' | 'Wallpaper'
		  BACK -> AN
		  TONAME -> TO FILENAME | TO DET FILENAME
		  CPY -> DET FILENAME SOURCE DESTINATION | DET FILENAME DESTINATION SOURCE
		  CPY -> FILENAME SOURCE DESTINATION | FILENAME DESTINATION SOURCE
		  SOURCE -> IN SOURCER
	     	  SOURCER -> DET FOLDER VBN APPNAME | DET FOLDER APPNAME | DET APPNAME
		  SOURCER -> FOLDER VBN APPNAME | FOLDER APPNAME | APPNAME
		  DESTINATION -> TO DESTINATIONR
		  DESTINATIONR -> DET FOLDER VBN APPNAME | DET FOLDER APPNAME | DET APPNAME 
		  DESTINATIONR -> FOLDER VBN APPNAME | FOLDER APPNAME | APPNAME
		  FOLDER -> 'folder'|'directory'|'file'|'Folder'|'File'|'Directory'|'FOLDER'|'FILE'|'DIRECTORY'
		  FOLDER -> NN
		  VP -> VERB NPP | VERB VP | ADVERB VP | VERB CPY
		  VP -> BER RB IN PPS
		  PPS -> DET PP | PP
		  PP -> JJ NOUN | NOUN | FOLDER VBN DET FILENAME | FOLDER VBN FILENAME | FOLDER FILENAME | FOLDER DET FILENAME 
		  PP -> FILENAME
		  MODAL -> MD
		  PRONOUN -> PPSS | PPO
		  VA -> VERB APPNAME
                  APPNAME -> AN
  		  VERB -> VB | VBN
		  ADVERB -> RB
		  DET -> AT
		  NOUN -> NN | NP | NNS
		  FILENAME -> AN
		  """
		
		str = 'NAME -> '
		for i in range(1,len(tagged_arr)):
			str+=tagged_arr[i]
			if i < len(tagged_arr)-1:
				str+=" | "

		str+="\n"

		grammar_string += str

		#add POS tags
		tl = len(tagged_arr)
		for i in range(1,tl):
			if tokens[i-1] not in ['file','folder','directory']:
				grammar_string+=tagged_arr[i]+" -> \'"+tokens[i-1]+"\'\n"

		simple_grammar = CFG.fromstring(grammar_string)
		#print(simple_grammar)

		parser = nltk.ChartParser(simple_grammar)

		json_str = ''
	
		ANs= []
		ANJSON = []
		VBs = []
		VBJSON = []
		NAMEs= []
		NJSON = []
		CCYs = []
		SOURCEs = []
		DESTs = []
		FILENAMEs = []
		TONAMEs = []
		TONAMEFILEs = []
		PPs = []
		PPANs = []
		WQUERY = []
		OBJ = []

		for tree in parser.parse(tokens):
			#print(tree)
			ANs = list(tree.subtrees(filter=lambda x: x.label()=='AN'))
			VBs = list(tree.subtrees(filter=lambda x: x.label()=='VERB'))
			NAMEs = list(tree.subtrees(filter=lambda x: x.label()=='NAME'))
			CCYs = list(tree.subtrees(filter=lambda x:x.label()=='CCY'))
			SOURCEs = list(tree.subtrees(filter=lambda x:x.label()=='SOURCER'))
			SOURCEs = map(lambda x: list(x.subtrees(filter=lambda x: x.label()=='AN')), SOURCEs)
			DESTs = list(tree.subtrees(filter = lambda x:x.label()=='DESTINATIONR'))
			DESTs = map(lambda x: list(x.subtrees(filter=lambda x: x.label()=='AN')), DESTs)
			FILENAMEs = list(tree.subtrees(filter = lambda x:x.label()=='FILENAME'))
			FILENAMEs = map(lambda x: list(x.subtrees(filter=lambda x: x.label()=='AN')), FILENAMEs)
			TONAMEs = list(tree.subtrees(filter=lambda x:x.label()=='TONAME'))
			TONAMEFILEs = map(lambda x: list(x.subtrees(filter=lambda x: x.label()=='AN')), TONAMEs)
			PPs = list(tree.subtrees(filter = lambda x:x.label()=='PP'))
			PPANs = map(lambda x: list(x.subtrees(filter=lambda x: x.label()=='AN')), PPs)
			WQUERY = list(tree.subtrees(filter = lambda x:x.label()=='WQUERY'))
			OBJ = map(lambda x: list(x.subtrees(filter=lambda x: x.label()=='NOUN')), WQUERY)

		if(len(PPANs)>0):
			PPANs = PPANs[0][0]
			PPANs = tree2json(PPANs)
			OBJ = tree2json(OBJ[0][0])
			obj = OBJ['NOUN'][0]
			nounArr = ['NNS','NP','NN']
			for n in nounArr:
				if n in obj:
					obj = obj[n]
					break
			obj = obj[0]
			counter(PPANs['AN'][0],obj)

		for i in xrange(0,len(ANs)):
			ANJSON.append(tree2json(ANs[i]))

		for i in xrange(0,len(VBs)):
			VBJSON.append(tree2json(VBs[i]))

		for i in xrange(0,len(NAMEs)):
			NJSON.append(tree2json(NAMEs[i]))

		for i in xrange(0,len(VBs)):
			verbRoot = VBJSON[i]['VERB']
			if 'VB' in verbRoot[0]:
				if verbRoot[0]['VB'][0] in ['open','close','shut','exit']:
					if isFile == True:
						actionSequence(verbRoot[0]['VB'][0],ANJSON,True)
					elif isDir == True:
						actionSequence(verbRoot[0]['VB'][0],ANJSON,False)
				elif verbRoot[0]['VB'][0] in ['make','create']:
					#if isDir == True:
					createSequence(verbRoot[0]['VB'][0],NJSON,str.rstrip('\n'))				
				elif verbRoot[0]['VB'][0] in ['copy','cut','move','duplicate']:
					SOURCEs = tree2json(SOURCEs[0][0])
					DESTs = tree2json(DESTs[0][0])
					FILENAMEs = tree2json(FILENAMEs[0][0])
					cutCopy(verbRoot[0]['VB'][0],FILENAMEs,SOURCEs,DESTs)
				elif verbRoot[0]['VB'][0] in ['change','replace']:
					changeWallpaper(verbRoot[0]['VB'][0],tree2json(TONAMEFILEs[0][0]))
コード例 #53
0
 def __init__(self, grammar):
     self.grammar = nltkCFG.fromstring(grammar)
コード例 #54
0
ファイル: generate.py プロジェクト: NatalieBlack/haiku_haiku
with_blank_spaces = ' '


############################################
############################################
############################################
def choose_line(some_lines):#5
    return a_random.choice(#7
                    some_lines).lower() #5

############################################

############################################
choose = choose_line #5

g = G.fromstring(#7
                    this_is_the_grammar) #5
############################################

############################################
while not len(pentas):#5
    for poem in generate(g, #7
                           start=N('five')): #5
############################################

############################################
      pentas.append(#5
                    with_blank_spaces.join(poem))#7

fives = pentas #5
############################################
コード例 #55
0
ファイル: rd.py プロジェクト: 5aurabhpathak/all-I-ve-done
#!/bin/env python3.5
from nltk import RecursiveDescentParser, CFG, pos_tag, word_tokenize
from nltk.draw.tree import TreeView
from os import system, remove

rdparser = RecursiveDescentParser(CFG.fromstring("""S -> NP VP
    PP -> P | P NP | P VP
    NP -> Det NP PP1 | Adj N PP1 | N PP1 | N NP PP1
    PP1 -> PP PP1 | 
    VP -> V NP PP1 | V PP1
    Det -> 'DT'
    N -> 'NN' | 'NNS' | 'NNPS' | 'NNP' | 'PRP' | 'PRP$'
    V -> 'VBZ' | 'VBD' | 'VBP' | 'VBG'
    Adj -> 'JJ'
    P -> 'IN'"""))

taggedsent = pos_tag(word_tokenize(''.join(c for c in input('Enter a sentence:') if c not in ':,;."')))
j = 1
for tree in rdparser.parse([x[1] for x in taggedsent]):
    i = iter(taggedsent)
    for s in tree.subtrees():
        if len(s) == 1: s[0] = next(i)[0]
    tv = TreeView(tree)
    tv._size.set(18)
    tv.resize()
    tv._cframe.canvas()['scrollregion'] = (0, 0, 1000,500)
    tv._cframe.print_to_file('output'+str(j)+'.ps')
    if system('convert output'+str(j)+'.ps -alpha off output'+str(j)+'.png') != 0:
       print(tree)
    remove('output'+str(j)+'.ps')
    j += 1
コード例 #56
0
def output(request):
    # Validation of form
    if request.method == "POST":
        # Validation of request
        if 'inputURL' in request.POST:
            # Validation of image url
            imageURL = request.POST.get('inputURL')
            image_output = imageURL
            indexOfDot = imageURL.rfind(".")
            if indexOfDot == -1:
                return fail(request) # not an image URL
            indexOfDot += 1
            extension = imageURL[indexOfDot:]
            if extension != 'jpg' and extension != 'jpeg' and extension != 'png':
                return fail(request) # not a valid image (jpg, jpeg, png)
                
            client_id = '8SkASX_SM8xc-fxMF4SdpzS_b9uew8yG0UrQp0y6'
            secret_id = 'EXkfCNxXeiHtnpsxn9Njui_yUpCuvcSAXzfSYjwN'
                
            clarifai_api = ClarifaiApi(client_id, secret_id) # assumes environment variables are set.
            return output(request, makes{image_output:'image_output', text_output:'text_output'})
                result = clarifai_api.tag_image_urls(imageURL)
            except ApiError:
                #return fail(request)
                
                messages.add_message(request, messages.INFO, "ApiError")
                return HttpResponseRedirect('makestory/fail.html')
            
            
            class_list = result['results'][0]['result']['tag']['classes']
            prob_list = result['results'][0]['result']['tag']['probs']
            
            class_str = ""
            for i in range(0, len(class_list)):
                class_str += class_list[i] + " " 
            
            # currently just the list of matched words
            text_output = class_list.__str__()
            
            # Parts of speech recognition
            tokens = nltk.word_tokenize(class_str)
            dictionary = PyDictionary()
            
            
            
            nouns = []
            verbs = []
            adjectives = []
            otherPos = []
            for word in tokens:
                definition = dictionary.meaning(word) # https://pypi.python.org/pypi/PyDictionary/1.3.4
                assignment = definition.keys()[0] # Get the part of speech from the dictonary
                
                # assignment = tuple[1]
                
                if assignment == 'Noun':
                    nouns.append(word)
                elif assignment == 'Verb':
                    verbs.append(word)
                elif assignment == 'Adjective':
                    adjectives.append(word)
                else:
                    otherPos.append(word)
                    
                    
            # Create the grammar
            #P:prepositions, DET:articles, adverbs
            P = ["on","in","at","since","for","ago","before","to","past","to","until","by","in","at","on","under","below","over","above","into","from","of","on","at"]
            DET = ["the","a","one","some","few","a few","the few","some"]
            
            assignments = pos_tag(tokens) # tagset='universal' for ADJ, NOUN, etc.
            
            pos_tags = []
            pos_words = {}
            for tuple in assignments:
                word = tuple[0]
                pos = tuple[1]
                if pos in pos_words:
                    pos_words[pos].append(word)
                else:
                    pos_words[pos] = []
                pos_tags.append(pos)
                
                
            
            
            grammar = """
            S -> NP VP
            PP -> P NP
            NP -> Det N | Det N PP
            VP -> V NP | VP PP
            Det -> 'DT'
            """
            # N -> 'NN'
            # V -> 'VBZ'
            # P -> 'PP'
            
            
            # adverb is RB
            
            if 'NN' in pos_words:
                grammar += 'N ->' + ' | '.join(pos_words['NN']) + '\n'
            
            if 'VB' in pos_words:
                grammar += 'V ->' + ' | '.join(pos_words['VB']) + '\n'
                
            if 'JJ' in pos_words:
                grammar += 'A ->' + ' | '.join(pos_words['JJ']) + '\n'
                
            simple_grammar = CFG.fromstring(grammar)
            #simple_grammar.start()
            simple_grammar.productions()
            
            sentences = []
            for sentence in generate(simple_grammar, n=10):
                sentences.append(' '.join(sentence))
            
            # parser = nltk.ChartParser(simple_grammar)
            # tree = parser.parse(pos_tags)
            


            caption = 'this is a caption'
            story = 'this is the story'
            
            return render(request, 'makestory/output.html',
                {
                'nouns_output': nouns,
                'verbs_output': verbs,
                'adjectives_output': adjectives,
                'otherPos_output': otherPos,
                'imageURL_output': imageURL,
                'caption_output': caption,
                'story_output': story,
                'sentences_test_output': sentences,
                }
            )
コード例 #57
0
regex = re.compile("(\w+\s*),(\s*\w+\s*)(,|(and))+(\s*(and)?\s*\w+)")
#reg = re.compile("\((,|!|\?)\)\1")
#regex2 = re.compile("\((,|!|\?)\)(\s*\w+\s*)+\1")
#regex2 = re.compile("\(,\)(\s*\w+\s*)+\1")
regex2 = re.compile(",(\s*\w+\s*)+,")
#regex3 = re.compile("!(\s*\w+\s*)+!")
#regex3 = re.compile("\((\s*\w+\s*)+\)(\s*\w+\s*)*\((,|!|\?)\)\1(\s*\w+\s*)*\2\1(\s*\w+\s*)*\2?")
#regex4 = re.compile("(\s*\w+\s*)*\((\s*\w+\s*)+\)\((,|!|\?)\)(\s*\w+\s*)*\1\2(\s*\w+\s*)*\1\2?")
#triple_to_dist = {}
list_reg = re.compile("(\w|\s)\s*\)")
grammar1 = CFG.fromstring("""
   S -> NP VP
   PP -> P NP
   NP -> Det N | Det N PP | Det A N | A N | N PP | "PRP$" N | N | "PRP$" A N | A N PP | N A PP | NP CC NP | NP NP NP | NP NP CC NP
   VP -> V NP | VP NP | VP PP | AV V | AV V NP | V AV | V AV NP | VP PP | V | VP CC VP | VP VP VP | VP VP CC VP
   Det -> "DT"
   V -> "VBZ" | "VB" | "VBG" | "VBN" | "VBD" | "VBP"
   P -> "PP" | "IN"
   A -> "JJ" | "JJR" | "JJS"
   AV -> "RB" | "RBR" | "RBS"
   N -> "NN" | "NNS" | "NNP" | "NNPS" | "PRP" | "CD"
  """)
parser1 = nltk.ChartParser(grammar1)
grammar2 = CFG.fromstring("""
   T -> S S S
   S -> NP VP
   PP -> P NP
   NP -> Det N | Det N PP | Det A N | A N | N PP | "PRP$" N | N | "PRP$" A N | A N PP | N A PP | NP CC NP | NP NP NP | NP NP CC NP
   VP -> V NP | VP NP | VP PP | AV V | AV V NP | V AV | V AV NP | VP PP | V | VP CC VP | VP VP VP | VP VP CC VP
   Det -> "DT"
   V -> "VBZ" | "VB" | "VBG" | "VBN" | "VBD" | "VBP"
   P -> "PP" | "IN"
コード例 #58
0
        
        
        
# Filter each sentence and return them all.
def eliminate(sentence):
    sents=nltk.sent_tokenize(sentence)
    for sent in sents:
        str=filter(sent)
        return str

#Here input is the chosen option on UI.
#Given IDs to each question as per NCERT Book,input will be given that chosen value.
input=26
# Generate variations of a particular question based on the input and its corresponding grammar.
if input==2:
    g=CFG.fromstring(g1)
    g2=CFG.fromstring(g2)
    rd_parser=nltk.RecursiveDescentParser(g)
    for sent,sent2 in zip(generate(g2,n=100),generate(g,n=100)):
        newsent1=' '.join(sent)
        newsent2=' '.join(sent2)
        ans1=eliminate(newsent1)
        ans2=eliminate(newsent2)
        if(ans1 == None or ans2 == None):
            pass
        else:
            print(ans1)
            print(ans2)
            print("Determine the length and breadth")
            print("\n")
elif input==4:
コード例 #59
0
#!/bin/env python3.5
from nltk import RecursiveDescentParser, pos_tag, CFG, Tree
from nltk.parse.earleychart import EarleyChartParser
from nltk.draw import TreeView
from os import system, remove

grammar1  = CFG.fromstring("""S -> NP VP
    PP -> P | P NP | P VP
    NP -> Det NP PP1 | Adj N PP1 | N PP1 | N NP PP1
    PP1 -> PP PP1 | 
    VP -> V NP PP1 | V PP1
    Det -> 'DT'
    N -> 'NN' | 'NNS' | 'NNPS' | 'NNP' | 'PRP' | 'PRP$'
    V -> 'VBZ' | 'VBD' | 'VBP' | 'VBG'
    Adj -> 'JJ'
    P -> 'IN'""")

grammar2 = CFG.fromstring("""S -> NP VP
        PP -> P | PP NP | PP VP
        NP -> Det NP | Adj NP | N NP | NP PP | N
        VP -> VP NP | VP PP | V
        Det -> 'DT'
        N -> 'NN' | 'NNS' | 'NNPS' | 'NNP' | 'PRP' | 'PRP$'
        V -> 'VBZ' | 'VBD' | 'VBP' | 'VBG'
        Adj -> 'JJ'
        P -> 'IN'""")

grammar = grammar1

rdparser, earlyparser = RecursiveDescentParser(grammar), EarleyChartParser(grammar)