def generate_phrase(self, pool): try: adj = choice(list(pool.adjectives)) parser = ChartParser(self.grammar) gr = parser.grammar() phrase = self.produce(gr, gr.start()) #adj = choice(list(pool.adjectives)) noun = choice(list(pool.comparisons[adj])) if en.noun.plural(noun.name) == noun.name: article = "the" else: article = en.noun.article(noun.name).split(" ")[0] replace_words = {'adj': adj, 'n': noun, 'det': article} for pos in replace_words: while pos in phrase: try: phrase = self.replace_pos(pos, replace_words[pos], phrase) except: return for w in phrase: if not isinstance(w, Word): phrase[phrase.index(w)] = Word(w) return phrase except: return
def generate_phrase(self, pool): parser = ChartParser(self.grammar) gr = parser.grammar() phrase = self.produce(gr, gr.start()) noun = choice(list(pool.nouns)) try: replace_words = {'n':[noun], 'v': [Word(self.conjugate(v.name)) for v in list(pool.verbs[noun])], 'adj': pool.epithets[noun], 'atv':[Word(self.conjugate(v, self.person)) for v in self.atv], 'eva':[Word(self.conjugate(v, self.person)) for v in self.eva], 'ej': pool.emotional_adjectives,'en':pool.emotional_nouns, 'erb': pool.emotional_adverbs, 'person':[Word(self.persons[self.person][0])], 'pron':[Word(self.persons[self.person][1])]} except: return for pos in replace_words: while pos in phrase: try: word = choice(replace_words[pos]) phrase = self.replace_pos(pos,word,phrase) except: return for w in phrase: if not isinstance(w, Word): phrase[phrase.index(w)] = Word(w) return phrase
def generate_phrase(self, pool): parser = ChartParser(self.grammar) gr = parser.grammar() phrase = self.produce(gr, gr.start()) noun = choice(list(pool.nouns)) try: replace_words = { 'n': [noun], 'v': [Word(self.conjugate(v.name)) for v in list(pool.verbs[noun])], 'adj': pool.epithets[noun], 'atv': [Word(self.conjugate(v, self.person)) for v in self.atv], 'eva': [Word(self.conjugate(v, self.person)) for v in self.eva], 'ej': pool.emotional_adjectives, 'en': pool.emotional_nouns, 'erb': pool.emotional_adverbs, 'person': [Word(self.persons[self.person][0])], 'pron': [Word(self.persons[self.person][1])] } except: return for pos in replace_words: while pos in phrase: try: word = choice(replace_words[pos]) phrase = self.replace_pos(pos, word, phrase) except: return for w in phrase: if not isinstance(w, Word): phrase[phrase.index(w)] = Word(w) return phrase
def generate_name(G): grammar = CFG.fromstring(G) parser = ChartParser(grammar) gr = parser.grammar() tokens = produce(gr, gr.start()) name = ''.join(tokens) return name.title()
def generate_entities_question(attr, entities, phase): entity = get_attribute_name(attr, entities) parser = ChartParser(generate_entities_grammar(entity, phase)) gr = parser.grammar() question = { 'text': ' '.join(produce(gr, gr.start())), 'answer': 0, 'questionId': 0, 'attrId': attr, 'topicId': 3 } return question
def generate_impacts_question(attr, impacts, phase): impact = get_attribute_name(attr, impacts) parser = ChartParser(generate_impacts_grammar(impact, phase)) gr = parser.grammar() question = { 'text': ' '.join(produce(gr, gr.start())), 'answer': 0, 'questionId': 0, 'attrId': attr, 'topicId': 4 } return question
def generate_sources_question(attr, parent_attr, sources, phase): id = attr attribute = get_attribute_name(attr, sources) attribute = analyze_numerus(attribute) if parent_attr is not None: parent_attr = get_attribute_name(parent_attr, sources) parser = ChartParser( generate_sources_grammar(attribute, parent_attr, phase)) gr = parser.grammar() question = { 'text': ' '.join(produce(gr, gr.start())), 'answer': 0, 'questionId': 0, 'attrId': id, 'topicId': 1 } return question
def generate_phrase(self): adj = choice([a for a in self.blackboard.pool.comparisons if len(self.blackboard.pool.comparisons[a]) > 0]) parser = ChartParser(self.grammar) gr = parser.grammar() phrase = self.produce(gr, gr.start()) noun = choice(list(self.blackboard.pool.comparisons[adj])) noun.name = en.singularize(noun.name) article = en.referenced(noun.name).split(" ")[0] replace_words = {'adj': adj, 'n': noun, 'det': article} for pos in replace_words: while pos in phrase: try: phrase = self.replace_pos( pos, replace_words[pos], phrase) except: return for w in phrase: if not isinstance(w, Word): phrase[phrase.index(w)] = Word(w) return phrase
def generate_phrase(self, pool): noun = random.choice(list(pool.nouns)) parser = ChartParser(self.grammar) gr = parser.grammar() phrase = self.produce(gr, gr.start()) phrase.append("?") try: adj = choice(pool.epithets[noun]) except: return replace_words = {'adj':adj, 'n': noun, 'be': self.conjugate("be")} for pos in replace_words: while pos in phrase: try: phrase = self.replace_pos(pos,replace_words[pos],phrase) except: return for w in phrase: if not isinstance(w, Word): phrase[phrase.index(w)] = Word(w) return phrase
def generate_phrase(self, pool): parser = ChartParser(self.grammar) gr = parser.grammar() phrase = self.produce(gr, gr.start()) noun = random.choice(list(pool.nouns)) adj = choice(pool.epithets[noun]) replace_words = { "adj": adj, "n": noun, "be": self.conjugate("be", self.person), "person": self.persons[self.person][0], } for pos in replace_words: while pos in phrase: try: phrase = self.replace_pos(pos, replace_words[pos], phrase) except: return for w in phrase: if not isinstance(w, Word): phrase[phrase.index(w)] = Word(w) return phrase
def generate_phrase(self, pool): noun = random.choice(list(pool.nouns)) parser = ChartParser(self.grammar) gr = parser.grammar() phrase = self.produce(gr, gr.start()) phrase.append("?") try: adj = choice(pool.epithets[noun]) except: return replace_words = {'adj': adj, 'n': noun, 'be': self.conjugate("be")} for pos in replace_words: while pos in phrase: try: phrase = self.replace_pos(pos, replace_words[pos], phrase) except: return for w in phrase: if not isinstance(w, Word): phrase[phrase.index(w)] = Word(w) return phrase
S -> LImports LRules LImports -> Import LImports | Import -> '@import' '"string"' ';' LRules -> Rule LRules | Rule -> Selectors '{' LDeclaretions '}' LDeclaretions -> Declaration ';' MoreDeclerations MoreDeclerations -> LDeclaretions | Selectors -> SimpleSelector MoreSelectors MoreSelectors -> Selectors | SimpleSelector -> Astrisk SelectorModifier Astrisk -> '*' | SelectorModifier -> '.' 'name' | ':' 'name' | '[' 'name' '=' Term ']' | '#hashid' | 'name' Declaration -> 'name' ':' LTerms Important Important -> '!ImPoRtAnT' | LTerms -> Term MoreTerms MoreTerms -> LTerms | Term -> '1337' | '15%' | '"string"' | 'name' | '#hashid' """) parser = ChartParser(grammar) gr = parser.grammar() test_name = "generated" with open(test_name + '.in', 'w+') as writer: writer.write(' '.join(produce(gr, gr.start()))) with open(test_name + '.out', 'w+') as writer: writer.write("\n".join(map(str, rules))) writer.write("\nSuccess\n")
else: words.extend(produce(grammar, sym, minlen)) return words grammar = parse_cfg(''' F -> N1 '(' P ')' | N2 '(' P ',' P ')' N1 -> 'half' N2 -> 'sum' P -> 'a' | 'b' | F ''') ''' S -> NP VP PP -> P NP NP -> Det N | Det N PP | 'I' VP -> V NP | VP PP V -> 'shot' | 'killed' | 'wounded' Det -> 'an' | 'my' N -> 'elephant' | 'pajamas' | 'cat' | 'dog' P -> 'in' | 'outside' ''' parser = ChartParser(grammar) gr = parser.grammar() print ' '.join(produce(gr, gr.start(),3))
def make_sentence(corpus, term_rules, *args, **kwargs): ''' Generate sentences with random structure and word choice using a context-free grammar The start point is taken from the sentence itself. Parameters ---------- corpus : str a string containing the full, cleaned corpus term_rules : str a string containing all the terminal rules for the corpus maxdepth : int The maximum allowed recursion depth before throwing a ValueError fixed_grammar : bool Turn off the random sentence selection and used a fixed grammar instead. sample_sentence : str When fixed_grammar is turned on, this is the sentence that will be parsed. This can be finicky with grammars containing specially punctuated constructions like quotations or positions args[0] : dict() Optional: a dictionary of kgrams and their subsequent words. If this variable exists then cfgen will use this to pick the next words with conditional weighting (The prescence of this argument turns on Markov text generation features.) Notes ----- Add the ability to turn off the kgram parsing, ideally by counting the number of unnamed arguments ----> Added this option ''' markov_flag = (not len(args) == 0) if markov_flag: kgram_dict = args[0] fixed_grammar = kwargs.pop('fixed_grammar', False) sample_sentence = kwargs.pop('sample_sentence', '') maxdepth = kwargs.pop('maxdepth', 25) if fixed_grammar: if sample_sentence == '': warnings.warn('When using fixed_grammar, user should specify ' \ 'the keyword argument "sample_sentence." Using a default simple sentence.') sample_sentence = 'The cow jumped over the moon.' else: pass flag = False attempts = 0 while not flag and attempts < 30: tokenizer = nltk.data.load('tokenizers/punkt/english.pickle') if has_parser and not fixed_grammar: rsent = choice(tokenizer.tokenize(corpus)) elif fixed_grammar: rsent = sample_sentence elif not has_parser and not fixed_grammar: # select from a parsed corpus of pre-approved grammars print("Usage library being built") rsent = "The dog walked up the stairs slowly." else: print("Usage library being built") rsent = "The dog walked up the stairs slowly." parsed_syntax = parse_sentence(rsent) # print(parsed_syntax) cfg_str = term_rules + parsed_syntax try: startpt = parsed_syntax[:parsed_syntax.find(' ->')] startpt = nltk.grammar.Nonterminal(startpt) grammar = CFG.fromstring(cfg_str) parser = ChartParser(grammar) gr = parser.grammar() if markov_flag: out_txt = (' '.join( produce_kgram(gr, startpt, kgram_dict, maxdepth=maxdepth, sent=[]))) else: out_txt = (' '.join(produce(gr, startpt, maxdepth=maxdepth))) flag = True except ValueError: warnings.warn( 'Badly formed sentence encountered, resampling the corpus.') attempts = attempts + 1 # now re-tag special characters swappairs = zip(replacements, to_replace) for member in swappairs: out_txt = out_txt.replace(member[0], member[1]) return out_txt
def make_sentence(self, do_markov=True, **kwargs): ''' Generate sentences with random structure and word choice using a context-free grammar The start point is taken from the sentence itself. Parameters ---------- do_markov : boolean that can be used to toggle the Markov word selection on or off maxdepth : int The maximum allowed recursion depth before throwing a ValueError fixed_grammar : bool Turn off the random sentence selection and used a fixed grammar instead. sample_sentence : str When fixed_grammar is turned on, this is the sentence that will be parsed. This can be finicky with grammars containing specially punctuated constructions like quotations or positions Notes ----- Add the ability to turn off the kgram parsing, ideally by counting the number of unnamed arguments ----> Added this option ''' corpus = self.corpus term_rules = self.term_rules if (self.order > 0) and do_markov: markov_flag = True else: markov_flag = False fixed_grammar = kwargs.pop('fixed_grammar', False) sample_sentence = kwargs.pop('sample_sentence', '') maxdepth = kwargs.pop('maxdepth', 25) if fixed_grammar: if sample_sentence == '': warnings.warn('When using fixed_grammar, user should specify ' \ 'the keyword argument "sample_sentence." Using a default simple sentence.') sample_sentence = 'The cow jumped over the moon.' else: pass flag = False attempts = 0 while not flag and attempts < 30: tokenizer = nltk.data.load('tokenizers/punkt/english.pickle') if has_parser and not fixed_grammar: rsent = choice(tokenizer.tokenize(corpus)) elif fixed_grammar: rsent = sample_sentence elif not has_parser and not fixed_grammar: # select from a parsed corpus of pre-approved grammars warnings.warn( "Usage library being built, falling back to simple sentence" ) rsent = "The dog walked up the stairs slowly." else: warnings.warn( "Usage library being built, falling back to simple sentence" ) rsent = "The dog walked up the stairs slowly." parsed_syntax = self.parse_sentence(rsent) # print(parsed_syntax) cfg_str = term_rules + parsed_syntax try: startpt = parsed_syntax[:parsed_syntax.find(' ->')] startpt = nltk.grammar.Nonterminal(startpt) grammar = CFG.fromstring(cfg_str) parser = ChartParser(grammar) gr = parser.grammar() if markov_flag: out_txt = (' '.join( self.produce_kgram(gr, startpt, maxdepth=maxdepth, sent=[]))) else: out_txt = (' '.join( self.produce(gr, startpt, maxdepth=maxdepth))) flag = True except ValueError: warnings.warn( 'Badly formed sentence encountered, resampling the corpus.' ) attempts += 1 # now re-tag special characters swappairs = zip(replacements, to_replace) for member in swappairs: out_txt = out_txt.replace(member[0], member[1]) return out_txt