def Tweet_content1(): grammar = CFG.fromstring(demo_grammar) for sentence in generate(grammar, n=4): """generating sentence of 4 words depth""" print(' '.join(sentence)) return sentence
def generate_sentences(self, remove_duplicates=True, is_test=False): """ Generates strings from self.grammar. Duplicates may optionally be removed. :type remove_duplicates: bool :param remove_duplicates: If True,duplicates will be removed :type is_test: bool :param is_test: If true, use test parameters to generate strings. Otherwise, use train parameters. :rtype: list :return: A list of strings generated by self.grammar """ params = self.params if is_test: params = params.test generator = generate(self.grammar, depth=params.sample_depth, n=params.sentence_count) if remove_duplicates: return [list(y) for y in set(tuple(x) for x in generator)] else: return list(generator)
def rand_sentences(n=10, depth=6, wpt=0.25): #grammar = CFG.fromstring(open('assets/text/grammar.txt', 'r').read()) grammar = CFG.fromstring(rand_vocabulary(wpt)) sentences = list(generate(grammar, n=n * 20, depth=depth)) return [ ' '.join(i) for i in random.sample(sentences, min(n, len(sentences))) ]
def generate_positive_examples(G, N=None): if N is None: N = random.randint(3, 10) T = [] nltk_grammar = convert2_nltk_CFG(G) # If grammer has no ending then it will be infinite # Really no grammar like this should be generated but # really limiting the depth is fine to stop it but the # technically the examples wont be in G # sentence is originally a list of terminal symbols # change it to a list of tokens # this generates examples in order # depth is the max sentence size desired d = min(np.log2(len(G[0].union(G[1]))**2), 8) for sentence in generate(nltk_grammar, n=50 * N, depth=15): #print(sentence) #print(len(sentence)) tokens = list(map(lambda x: (x, ), sentence)) if check(nltk_grammar, tokens, nltk=True): T.append(tokens) #print("don") # randomize the order of the strings random.shuffle(T) #print("len T", len(T)) #print("lenT[N]", len(T[:N])) TN = [] Nm = min(len(T), N) for i in range(Nm): TN.append(T[i]) #print("l TN", len(TN)) return TN
def gen_grammar3_past_plural(verb, direct_object, count): g1 = """ S -> W TR SUB V '?' | WA TR SUB V DO '?' W -> 'who' | 'what' | 'when' | 'where' | 'why' | 'how' WA -> 'when' | 'where' | 'why' | 'how' TR -> 'have' SUB -> PRO PRO -> 'they' |'you' V -> '%s' DO -> 'the %s' """ % (verb, direct_object) grammar1 = CFG.fromstring(g1) multiplier = 0 with open('sentences.csv', 'ab') as csvwriter: writer = csv.writer(csvwriter) for sentence in generate(grammar1, n=999): if sentence.find('who') == 0: multiplier = 1 if sentence.find('what') == 0: multiplier = 1 if sentence.find('when') == 0: multiplier = 2 if sentence.find('where') == 0: multiplier = 2 if sentence.find('why') == 0: multiplier = 4 if sentence.find('how') == 0: multiplier = 4 writer.writerow((' '.join(sentence), multiplier * count))
def generate_sentence(self, depth=9, num=30000): if num > 30000: num = 30000 gen_num = 0 done = False sentences_list = list() for dep in range(1, depth): sentences = generate(self.grammar, depth=dep) for s in sentences: sentences_list.append(' '.join(s) + '\n') gen_num += 1 if gen_num > num: done = True break if done: break # sentences = generate(self.grammar, depth=depth, n=4) # for s in sentences: # # file.write(' '.join(s) + '\n') # sentences_list.append(' '.join(s) + '\n') # sentences_list = sentences_list[0:num] random.shuffle(sentences_list) with open(self.origin_file, 'w') as file: for s in sentences_list: file.write(s)
def gen_grammar_plural(verb, direct_object, count): try: verb = en.verb.present_participle(verb) except KeyError: return if verb != "": g1 = """ S -> WA TR SUB V DO '?' | W TR SUB V '?' W -> 'who' | 'what' | 'when' | 'where' | 'why' | 'how' WA -> 'when' | 'where' | 'why' | 'how' TR -> 'are' | 'were' SUB -> 'they' | 'you' V -> '%s' DO -> 'the %s' """ % (verb, direct_object) grammar1 = CFG.fromstring(g1) multiplier = 1 with open('sentences.csv', 'ab') as csvwriter: writer = csv.writer(csvwriter) for sentence in generate(grammar1, n=999): sentence = ' '.join(sentence) if sentence.find('who') == 0: multiplier = 1 if sentence.find('what') == 0: multiplier = 1 if sentence.find('when') == 0: multiplier = 2 if sentence.find('where') == 0: multiplier = 2 if sentence.find('why') == 0: multiplier = 4 if sentence.find('how') == 0: multiplier = 4 writer.writerow((' '.join(sentence), multiplier * count))
def gen_grammar_plural(verb, direct_object, count): try: verb = en.verb.present_participle(verb) except KeyError: return if verb != "": g1 =""" S -> WA TR SUB V DO '?' | W TR SUB V '?' W -> 'who' | 'what' | 'when' | 'where' | 'why' | 'how' WA -> 'when' | 'where' | 'why' | 'how' TR -> 'are' | 'were' SUB -> 'they' | 'you' V -> '%s' DO -> 'the %s' """%(verb, direct_object) grammar1 = CFG.fromstring(g1) multiplier = 1 with open('sentences.csv', 'ab') as csvwriter: writer = csv.writer(csvwriter) for sentence in generate(grammar1, n=999): sentence = ' '.join(sentence) if sentence.find('who') == 0: multiplier = 1 if sentence.find('what') == 0: multiplier = 1 if sentence.find('when') == 0: multiplier = 2 if sentence.find('where') == 0: multiplier = 2 if sentence.find('why') == 0: multiplier = 4 if sentence.find('how') == 0: multiplier = 4 writer.writerow((' '.join(sentence) , multiplier*count))
def gen_grammar3_past_plural(verb, direct_object, count): g1 =""" S -> W TR SUB V '?' | WA TR SUB V DO '?' W -> 'who' | 'what' | 'when' | 'where' | 'why' | 'how' WA -> 'when' | 'where' | 'why' | 'how' TR -> 'have' SUB -> PRO PRO -> 'they' |'you' V -> '%s' DO -> 'the %s' """%(verb, direct_object) grammar1 = CFG.fromstring(g1) multiplier = 0 with open('sentences.csv', 'ab') as csvwriter: writer = csv.writer(csvwriter) for sentence in generate(grammar1, n=999): if sentence.find('who') == 0: multiplier = 1 if sentence.find('what') == 0: multiplier = 1 if sentence.find('when') == 0: multiplier = 2 if sentence.find('where') == 0: multiplier = 2 if sentence.find('why') == 0: multiplier = 4 if sentence.find('how') == 0: multiplier = 4 writer.writerow((' '.join(sentence) , multiplier*count))
def generate_from_grammar(self, n, depth): grammar = CFG.fromstring(self.gramma) print("Generuje dla n " + n + " i depth " + depth) for track in generate(grammar, n=int(n), depth=int(depth)): self.track_array.append(' '.join(track)) # produkcje numbers = " ".join(track) self.productions.append(numbers)
def get_n_introductions(number): r = random.randint(0, 1) if r == 1: all_pos_sentences = list(generate(pos_grammar)) pos_number = len(all_pos_sentences) print(pos_number) pos_sentence = all_pos_sentences[random.randint(0, pos_number)] return "Our tactics guy, and " + ' '.join( pos_sentence) + ", Nathan A Clark. Hello, Nathan." else: all_noun_sentences = list(generate(noun_grammar)) noun_number = len(all_noun_sentences) print(noun_number) noun_sentence = all_noun_sentences[random.randint(0, noun_number)] return "Our tactics guy, and " + ' '.join( noun_sentence) + ", Nathan A Clark. Hello, Nathan."
def __init__(self, grammar, depth=5): """ Initialize from a CFG. :type grammar: CFG :param grammar: A CFG generating the text. """ self._iterator = generate(grammar, depth=depth)
def banjoify(rules, song): arrangement = [] for pitch, duration in parse_abc(song): grammar = CFG.fromstring(rules.format(pitch=pitch)) options = list(generate(grammar, start=Nonterminal(duration))) phrase = random.choice(options) arrangement.append(''.join(phrase)) return ' '.join(arrangement)
def generate_dataset(grammar, correct, incorrect): """ Generate data with correct and incorrect number-verb agreement. Args: grammar (str): NLTK feature grammar correct (dict): for each number condition (key) a start symbol rule (value) to create sentences with noun-verb agreement incorrect (dict): for each number condition (key) a start symbol rule (value) to create sentences with incorrect verb number Returns: data_correct (list): tuples of (sentence, number_condition) for all correct sentences data_incorrect (list): tuples of (sentence, number_condition) for all sentences with number-verb disagreement """ n_conditions = len(list(correct.keys())[0].split("_")) # Tasks that only have one noun of which we are tracking the number # Examples: simple, adv, qnty_simple, namepp if n_conditions == 1: grammar_correct, _ = get_grammar(grammar, correct["sg"]) # Tasks that have two nouns of which we are tracking the number # Examples: nounpp elif n_conditions == 2: grammar_correct, _ = get_grammar(grammar, correct["sg_sg"]) elif n_conditions == 3: grammar_correct, _ = get_grammar(grammar, correct["sg_sg_sg"]) # Not tracking more than 3 nouns else: sys.exit( "Number of conditions is incorrect. Please check the template.") correct_parsers = defaultdict() incorrect_parsers = defaultdict() data_correct, data_incorrect = [], [] # 'corect' and 'incorrect' are dictionaries containing the same keys # Get the parsers for both the correct sentences and the incorrect # sentences, where the verb number does not match the noun number for corr_key, incorr_key in zip(correct, incorrect): _, correct_parsers[corr_key] = get_grammar(grammar, correct[corr_key]) _, incorrect_parsers[incorr_key] = get_grammar(grammar, incorrect[incorr_key]) # Generate n sentences and classify as either correct or incorrect for sent in tqdm(list(generate(grammar_correct, n=1000000))): for key in correct_parsers: # If a parser for correct sentence can parse the current sentence, # the sentence is correct if list(correct_parsers[key].parse(sent)): data_correct.append((" ".join(sent), key)) break elif list(incorrect_parsers[key].parse(sent)): data_incorrect.append((" ".join(sent), key)) break return data_correct, data_incorrect
def generate_from_grammar(G, depth=50, n=999): C = "" # corpus ## toutes les phrases possibles print("\n") for n, sent in enumerate(generate.generate(G, depth=depth, n=n), 1): s = ' '.join(sent) C += s + '. ' print('%3d. %s%s' % (n, s, '.')) return C
def generate_initiative(nouns, adjs, verbs): grammar = init_grammar(nouns, adjs, verbs) # print(grammar) # for sentence in generate(grammar, depth=1000): # print(' '.join(sentence)) results = generate(grammar) return results
def generate_text(grammar,N): from nltk.grammar import CFG import nltk.parse.generate as gen print('Generating the first %d sentences for demo grammar:' % (N,)) print(grammar) grammar = CFG.fromstring(grammar) grm_list = gen.generate(grammar, n=N) for n, sent in enumerate(grm_list): print('%3d. %s' % (n, ' '.join(sent)))
def generate_sent(n: int = 1) -> list: """ Generate Thai Sentences :param int n: number sentences :return: list sentences :rtype: list """ global _thaigrammar return [' '.join(i) for i in generate(_thaigrammar, n=n)]
def gen_sql_stmt_from_grammar(self, start_, num_stmts=None, table_name="table_name", columns_name="columns_names"): grammar = CFG.fromstring( self.get_sql_select_stml_grammar(table_name, columns_name, COMMON_VALUES)) sql_select_stmts = [] for stmt in generate(grammar, start=Nonterminal(start_), n=num_stmts): sql_select_stmts.append(''.join(stmt)) return sql_select_stmts
def generate_messages(self): """ Generates messages for a synthetic structured language according to a simple grammar, not randomly. Yields ------ message : list A list with each element a word (str) in the message. """ for message in generate(self.grammar): yield message
def main(args): grammar_string = DEMO_GRAMMAR if args.input_file_path: with open(args.input_file_path, 'r') as f: grammar_string = f.read() grammar = CFG.fromstring(grammar_string) for sentence in generate(grammar, depth=args.depth): print(''.join(sentence)) return 0
def grammar_get( base_grammar_fname: util.type.TFile, domain_grammar_fnames: T.List[util.type.TFile], sents_per_domain=100) -> TNameToSents: with open(base_grammar_fname, 'r') as f: base = f.read() sents = {} for domain_fname in domain_grammar_fnames: with open(domain_fname, 'r') as f: productions = f.read() grammar = CFG.fromstring(base + productions) sents[domain_fname] = list(generate(grammar, n=sents_per_domain)) return sents
def respondQuestion(sentence, keyWord, POS): if "Tell me" not in sentence: grammar = "" if POS == "NNPS" or POS == "NNS": grammar = CFG.fromstring(""" S -> H-NP1 Adj VP'?' | Wh-NP VP'?' H-NP1 -> 'How' Wh-NP -> 'Who' | 'What' | 'Where' | 'What' Adj -> 'big' | 'small' | 'happy' | 'sad' | 'large' | 'difficult' | 'emotional' | 'old' | 'healthy' | 'strong' | 'cute' | 'hungry' NP -> Pronoun | Proper-Noun | Noun Pronoun -> 'they' | 'those' Proper-Noun -> '[]' Noun -> 'the <>' VP -> Verb NP Verb -> 'are' """) elif POS == "NN" or "NNP": grammar = CFG.fromstring(""" S -> H-NP1 Adj VP'?' | Wh-NP VP'?' H-NP1 -> 'How' Wh-NP -> 'Who' | 'What' | 'Where' | 'What' Adj -> 'big' | 'small' | 'happy' | 'sad' | 'large' | 'difficult' | 'emotional' | 'old' | 'healthy' | 'strong' | 'cute' | 'hungry' NP -> Pronoun | Proper-Noun | Noun Pronoun -> 'it' | 'that' Proper-Noun -> '[]' Noun -> 'the <>' VP -> Verb NP Verb -> 'is' """) rand_sent_list = [] response = "" for sentence in generate(grammar): rand_sent_list.append(' '.join(sentence)) while True: num = randint(0, len(rand_sent_list)-1) response = rand_sent_list[num] if "<>" in response and (POS == "NNS" or POS == "NN"): index = response.index("<>") response = response[:index] + keyWord + response[index+2:] break if "[]" in response and (POS == "NNPS" or POS == "NNP"): index = response.index("[]") response = response[:index] + keyWord + response[index+2:] break if "<>" not in response and "[]" not in response: break return response else: knowledgeRep(sentence)
def generate_pairs(depth, cfg): ''' num_pairs: Integer denoting the number of translation pairs depth: integer for thedepth of the parse tree in the CFG cfg: chosen grammar, 1, 2 or 3 ''' if (cfg == 1): grammar = CFG.fromstring(""" S -> Y Y -> a Y b | a Y | a | a -> '(' ')' b -> '{' '}' """) elif cfg == 2: grammar = CFG.fromstring(""" S -> X | Y | X Y X -> a Y -> b a -> '(' a ')' | b -> '{' b '}' | """) elif cfg == 3: grammar = CFG.fromstring(""" S -> X X -> a | b a -> '(' a ')' | b -> '{' b '}' | '{' a '}' """) trg = list(generate(grammar, depth=depth)) trg_list = [] for sentence in trg: k = ''.join(sentence) trg_list.append(k) src_list = trg2src(trg) if cfg == 1: A = list((s + 'A ' for s in src_list)) elif cfg == 2: A = list((s + 'B ' for s in src_list)) elif cfg == 3: A = list((s + 'C ' for s in src_list)) else: None B = list((s for s in trg_list)) df = pd.concat([pd.Series(A), pd.Series(B)], axis=1) pairs = (df.iloc[:, 0] + df.iloc[:, 1]).values.tolist() return pairs
def generateRawTemplates(depth): gram = CFG.fromstring(grammarstring) rawTemplates = generate(gram, depth=depth) templatefiles = [] for index, state in enumerate(rawTemplates): filename = os.path.join("./templates", "template" + str(index)) with open(filename, 'w') as templatefile: templatefile.write(' '.join(state)) templatefiles.append(filename) print str(len(templatefiles)) + " template files generated" return templatefiles
def generate_blazons(grammarfile, n, depth=None): bs = [] with open(grammarfile) as g: raw_cfg = g.read() parser_grammar = CFG.fromstring(raw_cfg) for blazon in generate(parser_grammar, n=n, depth=depth): bwords = blazon field = bwords[0] z = ((isColour(field) and not any(map(isColour, bwords[1:]))) or (isMetal(field) and not any(map(isMetal, bwords[1:])))) and ( field not in bwords[1:]) if z: bs.append(' '.join(blazon)) return bs
def generate_sentence(subject, predicate, object, useTemplate=False): if useTemplate==False: predicate = literal_tuner(predicate) rand = random.randint(0, 1) object = literal_tuner(object) grammar = get_grammar(subject, object, predicate) #very simplified randomization string generation because we currently only have two valiid compositions for sentence in generate(grammar, n=10): if rand < 1: return ' '.join(sentence) else: rand = rand-1 continue
def generate(self, tree_depth, num_expressions): """Generates expression strings from context-free grammar. Args: tree_depth: Integer, depth of the grammar parsing tree. num_expressions: Integer, maximum number of expressions to generate. Yields: List of token strings for an expression string. """ for token_list in generate.generate(self._cfg, depth=tree_depth, n=num_expressions): yield token_list
def generateRawTemplates(depth): gram = CFG.fromstring(grammarstring) rawTemplates = generate(gram, depth=depth) templatefiles = [] for index, state in enumerate(rawTemplates): filename = os.path.join("./templates","template"+str(index)) with open(filename, 'w') as templatefile: templatefile.write(' '.join(state)) templatefiles.append(filename) print str(len(templatefiles))+" template files generated" return templatefiles
def main(): """Skeleton towards a data generation process. An actual data generation pipeline would: 1. Generate all basic sentences and throw them in train. 2. Generate all complex sentences, and divide between train/test. """ print("Basic grammar...") basic_grammar = load_grammar("grammars/basic_sents.fcfg") sentences = tqdm(generate(basic_grammar)) expressions = semantic_parse(sentences, basic_grammar) for expression in expressions: value = evaluate(expression, model_dict) print(str(expression), ":", value) print("Complex grammar...") complex_grammar = load_grammar("grammars/complex_sents.fcfg") sentences = tqdm(generate(complex_grammar, n=200, depth=5)) expressions = semantic_parse(sentences, complex_grammar) for expression in expressions: value = evaluate(expression, model_dict) if value is not None: print(str(expression), ":", value)
def main_phrases(name, othername=None): phrase_grammar = f""" S -> '{name}!' S -> 'Go, {name}!' S -> 'Show them, {name}!' S -> 'I chose you, {name}!' S -> 'You can do it, {name}!' """ grammar = CFG.fromstring(phrase_grammar) phrases = list(generate(grammar)) shortlist = list(filter(lambda x: (syllables.estimate(x[0]) == 5), phrases)) return shortlist[0]
def single_phrases(name, types=None, evolutions=None, attacks=None): phrase_grammar = f""" """ if attacks != None: for attack in attacks: phrase_grammar += f""" S -> 'Use {attack}!' S -> '{attack}, now!' """ grammar = CFG.fromstring(phrase_grammar) return generate(grammar)
def generate_tweet(grammar): from nltk.grammar import CFG import nltk.parse.generate as gen print(grammar) grammar = CFG.fromstring(grammar) grm_list = gen.generate(grammar, n=SIZE) # TODO voir la taille max ? moyen de la recuperer ? from random import randint rd = randint(0,SIZE) cpt = 0 for n, sent in enumerate(grm_list): if rd == cpt: print ("Your tweet : ") print('%3d. %s' % (n, ' '.join(sent))) cpt += 1
def generate_sample_strings(self, remove_duplicates=True): """ Generates all strings from self.grammar up to the depth specified by self.depth. Duplicates may optionally be removed. :type remove_duplicates: bool :param remove_duplicates: If True, duplicates will be removed :rtype: list :return: A list of strings generated by self.grammar """ generator = generate(self.grammar, depth=self.sample_depth) if remove_duplicates: return [list(y) for y in set(tuple(x) for x in generator)] else: return list(generator)
def generate(self, topstart="top", n=no_sents): if topstart == "top": topstart = self.start else: topstart = self.findstart(topstart) if n > 0: max = n else: max = CorpusGenerator.no_sents sentences = 0 for sentence in generate(self.grammar, start=topstart, n=max): if max < 1000000000: print(' '.join(sentence)) sentences += 1 print "Produced sentences: " + str(sentences)
def main(): zen = """ Beautiful is better than ugly. Explicit is better than implicit. Simple is better than complex. Complex is better than complicated. Flat is better than nested. Sparse is better than dense. Readability counts. Special cases aren't special enough to break the rules. Although practicality beats purity. Errors should never pass silently. Unless explicitly silenced. In the face of ambiguity, refuse the temptation to guess. There should be one-- and preferably only one --obvious way to do it. Although that way may not be obvious at first unless you're Dutch. Now is better than never. Although never is often better than *right* now. If the implementation is hard to explain, it's a bad idea. If the implementation is easy to explain, it may be a good idea. Namespaces are one honking great idea -- let's do more of those!""" tagged = nltk.pos_tag(nltk.word_tokenize(zen)) tagged = [(tag, word) for word, tag in tagged] # #tag_word_map = defaultdict(list) #[(tag, word) for word, tag in tagged] tags = set([tag for tag, _ in tagged]) tag_word_map = {tag: {word for key, word in tagged if key == tag} for tag in tags} gram_head = """ S -> NNP VBZ JJR IN RB """ cats = ['NNP', 'VBZ', 'JJR', 'IN', 'RB'] gram = [cat + ' -> ' + '|'.join([repr(x) for x in tag_word_map[cat]]) for cat in cats] grammar = gram_head + '\n'.join(gram) grammar = nltk.CFG.fromstring(grammar) poem = [] for sentence2 in generate(grammar, depth=5): poem.append(' '.join(sentence2)) out = "\n".join(choice(poem, size=10)) print(out)
def onMessage(self, author_id, message_object, thread_id, thread_type, **kwargs): self.markAsDelivered(thread_id, message_object.uid) self.markAsRead(thread_id) log.info("{} from {} in {}".format(message_object, thread_id, thread_type.name)) log.info(message_object.text) # If you're not the author, echo if author_id != self.uid: grammar = CFG.fromstring(demo_grammar) self.send(Message(text=generate(grammar, depth=14)), thread_id=thread_id, thread_type=thread_type) self.send(Message(text='Chao'), thread_id=thread_id, thread_type=thread_type)
def generate_sentences(args): in_file = open(args[1]) out_file = open(args[2],'w') gram = in_file.read() grammar = CFG.fromstring(gram) print(grammar) sentences = "" for s in generate(grammar, depth=int(args[0])): sentences += ''.join(s) + '\n' out_file.writelines(sentences) in_file.close() out_file.close()
g3 = """ S -> S1[G=?n] S1[G='money'] -> 'How many notes of each denomination person has?' S1[G='shape'] -> 'What are its length and breadth?' S1[G='int'] -> 'What are the two numbers?' S1[G='age'] -> 'What are their present ages?' S1[G='class'] -> 'What is the total strength?' """ first=[] sec=[] third=[] grammar1 = nltk.grammar.FeatureGrammar.fromstring("""% start S"""+"\n"+gramstring) parser1 = nltk.FeatureChartParser(grammar1) for sentence1 in generate(grammar1): if(parser1.parse_one(sentence1)): string1=' '.join(sentence1) first.append(string1) #print(l) grammar2 = nltk.grammar.FeatureGrammar.fromstring("""% start S"""+"\n"+g2) parser2 = nltk.FeatureChartParser(grammar2) for sentence2 in generate(grammar2): if(parser2.parse_one(sentence2)): string2=' '.join(sentence2) if string2 not in sec: sec.append(string2) else: pass
import nltk from nltk.parse import generate from nltk.grammar import Nonterminal cfg = nltk.CFG.fromstring(""" root -> who_player has the most runs who_player -> who who_player -> which player who_player -> which team player who -> 'who' which -> 'which' player -> 'player' team -> 'indian' | 'australian' | 'england' | 'sri' 'lankan' has -> 'has' the -> 'the' this -> 'this' most -> 'most' runs -> 'runs' """) print(list((n,sent) for n, sent in enumerate(generate.generate(cfg, n=100, start=Nonterminal('root')), 1))) result1 = nltk.ChartParser(cfg).parse('which england player has the most runs'.split()) result2 = nltk.ChartParser(cfg).parse(['which', 'sri', 'lankan', 'player', 'has', 'the', 'most', 'runs']) print(list(result1)) print(list(result2))
def output(request): # Validation of form if request.method == "POST": # Validation of request if 'inputURL' in request.POST: # Validation of image url imageURL = request.POST.get('inputURL') image_output = imageURL indexOfDot = imageURL.rfind(".") if indexOfDot == -1: return fail(request) # not an image URL indexOfDot += 1 extension = imageURL[indexOfDot:] if extension != 'jpg' and extension != 'jpeg' and extension != 'png': return fail(request) # not a valid image (jpg, jpeg, png) client_id = '8SkASX_SM8xc-fxMF4SdpzS_b9uew8yG0UrQp0y6' secret_id = 'EXkfCNxXeiHtnpsxn9Njui_yUpCuvcSAXzfSYjwN' clarifai_api = ClarifaiApi(client_id, secret_id) # assumes environment variables are set. try: result = clarifai_api.tag_image_urls(imageURL) except ApiError: #return fail(request) messages.add_message(request, messages.INFO, "ApiError") return HttpResponseRedirect('makestory/fail.html') class_list = result['results'][0]['result']['tag']['classes'] prob_list = result['results'][0]['result']['tag']['probs'] class_str = "" for i in range(0, len(class_list)/2): class_str += class_list[i] + " " # currently just the list of matched words text_output = class_list.__str__() # Parts of speech recognition tokens = nltk.word_tokenize(class_str) # dictionary = PyDictionary() # nouns = [] # verbs = [] # adjectives = [] # otherPos = [] # for word in tokens: # #definition = dictionary.meaning(word) # https://pypi.python.org/pypi/PyDictionary/1.3.4 # #assignment = definition.keys()[0] # Get the part of speech from the dictonary # assignment = "" # # assignment = tuple[1] # if assignment == 'Noun': # nouns.append(word) # elif assignment == 'Verb': # verbs.append(word) # elif assignment == 'Adjective': # adjectives.append(word) # else: # otherPos.append(word) # Create the grammar #P:prepositions, DET:articles, adverbs DET = ["'the'","'a'","'some'"] # P = ["'in'","'at'","'since'","'for'","'to'","'past'","'to'""'by'","'in'","'at'","'on'","'under'","'below'","'over'","'above'","'into'","'from'","'of'","'on'","'at'"] VB = ["'talks'","'does'","'has'","'cries'", "'fights'", "'traps'", "'bakes'", "'fondles'", "'cooks'", "'sees'", "'calls'", "'smells'", "'tastes'", "'hears'"] assignments = pos_tag(tokens) # tagset='universal' for ADJ, NOUN, etc. # pos_tags = [] pos_words = {} pos_words['DET'] = DET #pos_words['P'] = P pos_words['VB'] = VB for tuple in assignments: word = tuple[0] pos = tuple[1] if pos in pos_words: pos_words[pos].append("\'" + word + "\'") else: pos_words[pos] = [] pos_words[pos].append("\'" + word + "\'") # pos_tags.append(pos) #grammar = """ #S -> NP VP #PP -> P NP #NP -> Det N #VP -> V Det N | V Det N PP #""" grammar = """ S -> NP VP NP -> Det N VP -> V Det N """ #Det -> 'DT' # N -> 'NN' # V -> 'VBZ' # P -> 'PP' # adverb is RB if 'DET' in pos_words: grammar += 'Det ->' + ' | '.join(pos_words['DET']) + '\n' if 'P' in pos_words: grammar += 'P ->' + ' | '.join(pos_words['P']) + '\n' if 'NN' in pos_words: grammar += 'N ->' + ' | '.join(pos_words['NN']) + '\n' #change to VB for nltk if 'VB' in pos_words: grammar += 'V ->' + ' | '.join(pos_words['VB']) + '\n' #if 'JJ' in pos_words: # grammar += 'A ->' + ' | '.join(pos_words['JJ']) + '\n' simple_grammar = CFG.fromstring(grammar) # simple_grammar.start() # simple_grammar.productions() sentences = [] sentence_validity = [] for sentence in generate(simple_grammar, depth=4): sentences.append(' '.join(sentence)) sentence_validity = get_validity(sentences) #get_validity(sentences) # parser = nltk.ChartParser(simple_grammar) # tree = parser.parse(pos_tags) story = "" for i in range(0, 10): tuple = sentence_validity[i] string = tuple[1] start_letter = string[0].upper() story += start_letter story += string[1:] story += ". " return render(request, 'makestory/output.html', { 'imageURL_output': imageURL, 'story_output': story, 'grammar_test_output': simple_grammar, 'sentences_test_output': sentences, } ) else: return fail(request) return fail(request)
def generate_syllables(self): ''' every possible syllable for the given phonemes and grammar ''' # spaces, which are only there for NLTK's sake, are removed return [re.sub(' ', '', '/'.join(s)) for s in \ generate(self.grammar, depth=4)]
# Filter each sentence and return them all. def eliminate(sentence): sents=nltk.sent_tokenize(sentence) for sent in sents: str=filter(sent) return str #Here input is the chosen option on UI. #Given IDs to each question as per NCERT Book,input will be given that chosen value. input=26 # Generate variations of a particular question based on the input and its corresponding grammar. if input==2: g=CFG.fromstring(g1) g2=CFG.fromstring(g2) rd_parser=nltk.RecursiveDescentParser(g) for sent,sent2 in zip(generate(g2,n=100),generate(g,n=100)): newsent1=' '.join(sent) newsent2=' '.join(sent2) ans1=eliminate(newsent1) ans2=eliminate(newsent2) if(ans1 == None or ans2 == None): pass else: print(ans1) print(ans2) print("Determine the length and breadth") print("\n") elif input==4: g=CFG.fromstring(g3) g2=CFG.fromstring(g4) rd_parser=nltk.RecursiveDescentParser(g)
from contractions import contractions sent_tokenizer = PunktSentenceTokenizer() with open("<source of text>", "r") as f: text = f.read() for k, v in contractions.items(): text = text.replace(k, v) sents = [] for paragraph in text.split('\n'): sents += sent_tokenizer.tokenize(paragraph) parser = Parser() productions = [] for sent in sents[:25]: try: tree = parser.parse(sent) productions += tree.productions() except: pass S = Nonterminal('S') grammar = induce_pcfg(S, productions) for sentence in generate(grammar, depth=5): print " ".join(sentence) + "\n"
def surface_realizer(grammar): for sentence in generate(grammar, n=10): return ' '.join(sentence)
__author__ = 'Mohammed Shokr <*****@*****.**>' # Generating sentences from context-free grammars from nltk.parse.generate import generate, demo_grammar from nltk import CFG # An example grammar: grammar = CFG.fromstring(demo_grammar) print(grammar) print("#---------------------------------------------------------------#") # The first 10 generated sentences: for sentence in generate(grammar, n=10): print(' '.join(sentence)) print("#---------------------------------------------------------------#")
if 'NN' in pos_words: grammar += 'N ->' + ' | '.join(pos_words['NN']) + '\n' if 'VB' in pos_words: grammar += 'V ->' + ' | '.join(pos_words['VB']) + '\n' if 'JJ' in pos_words: grammar += 'A ->' + ' | '.join(pos_words['JJ']) + '\n' simple_grammar = CFG.fromstring(grammar) #simple_grammar.start() simple_grammar.productions() sentences = [] for sentence in generate(simple_grammar, n=10): sentences.append(' '.join(sentence)) # parser = nltk.ChartParser(simple_grammar) # tree = parser.parse(pos_tags) caption = 'this is a caption' story = 'this is the story' return render(request, 'makestory/output.html', { 'nouns_output': nouns, 'verbs_output': verbs, 'adjectives_output': adjectives,
def choose_line(some_lines):#5 return a_random.choice(#7 some_lines).lower() #5 ############################################ ############################################ choose = choose_line #5 g = G.fromstring(#7 this_is_the_grammar) #5 ############################################ ############################################ while not len(pentas):#5 for poem in generate(g, #7 start=N('five')): #5 ############################################ ############################################ pentas.append(#5 with_blank_spaces.join(poem))#7 fives = pentas #5 ############################################ ############################################ third = choose(fives) #5 first = choose(fives) #7 def display_the(poem):#5 ############################################
[1. if i == b else 0. for i in xrange(len(code_for))]) # list of codes of symbols to predict to_predict_codes = [onehot(code_for[s]) for s in to_predict] # function to test if a symbol code is in list to predict def in_predict_codes(code): for i in xrange(len(to_predict_codes)): if ((code == to_predict_codes[i]).all()): return True return False # sample_strings = all strings from grammar of depth at most sample_depth sample_strings = list(generate(grammar, depth=sample_depth)) # report #, min length and max length for strings in sample_strings print("number of sample strings = {}".format(len(sample_strings))) sample_lengths = [len(s) for s in sample_strings] print("min length = {}, max length = {}".format(min(sample_lengths), max(sample_lengths))) # sanity check: report one random string from sample_strings print "random sample string = {}".format(random.choice(sample_strings)) ################################# model = VanillaModel(len(code_for), READ_SIZE, len(code_for)) try: model.cuda()
from nltk.parse.generate import generate #, demo_grammar from nltk import CFG demo_grammar = """ S -> NP VP NP -> Det N PP -> P NP VP -> 'slept' | 'saw' NP | 'walked' PP Det -> 'the' | 'a' N -> 'man' | 'park' | 'dog' P -> 'in' | 'with' """ grammar = CFG.fromstring(demo_grammar) print(grammar) #Join words and generate based off of grammar - for n for sentence in generate(grammar, n=12): print(' '.join(sentence)) ''' Notes: Need to symbolize the grammar Have the machine process the language Need to integrate with Markov chain - file 'agiliq-markov.py' ''' for sentence in generate(grammar, depth=4): print(' '.join(sentence))
# to save typing dgr = dyck_grammar uagr = unambig_agreement_grammar eegr = exp_eval_grammar # comparisons of table calculations and reported sample sizes # Note: the generate function from nltk uses a notion of # depth that is 1 more than that used above! # NB: the dyck_grammar is NOT unambiguous (S -> S S) dgr_table = make_table(6, dgr) print "dyck_grammar for 4 from count_nonterminal_depth" print count_nonterminal_depth(dgr.start(), 4, dgr_table, dgr) print "nltk generate: number of sentences for dyck grammar at depth = 5" print len(list(generate(dgr, depth=5))) print "The dyck_grammar is ambiguous!" # unambig_agreement_grammar # this agrees with the count for depth = 16 in generate uagr_table = make_table(15, uagr) print "unambig_agreement_grammar for 15 from count_nonterminal_depth" print count_nonterminal_depth(uagr.start(), 15, uagr_table, uagr) # exp_eval_grammar # this agrees with the count for depth = 6 in generate eegr_table = make_table(5, eegr) print "exp_eval_grammar for 5 from count_nonterminal_depth" print count_nonterminal_depth(eegr.start(), 5, eegr_table, eegr) print "number of nltk depth = 7 sentences from dyck_grammar"
def output(request): # Validation of form if request.method == "POST": # Validation of request if 'inputURL' in request.POST: # Validation of image url imageURL = request.POST.get('inputURL') image_output = imageURL indexOfDot = imageURL.rfind(".") if indexOfDot == -1: return fail(request) # not an image URL indexOfDot += 1 extension = imageURL[indexOfDot:] if extension != 'jpg' and extension != 'jpeg' and extension != 'png': return fail(request) # not a valid image (jpg, jpeg, png) client_id = '8SkASX_SM8xc-fxMF4SdpzS_b9uew8yG0UrQp0y6' secret_id = 'EXkfCNxXeiHtnpsxn9Njui_yUpCuvcSAXzfSYjwN' clarifai_api = ClarifaiApi(client_id, secret_id) # assumes environment variables are set. return output(request, makes{image_output:'image_output', text_output:'text_output'}) result = clarifai_api.tag_image_urls(imageURL) except ApiError: #return fail(request) messages.add_message(request, messages.INFO, "ApiError") return HttpResponseRedirect('makestory/fail.html') class_list = result['results'][0]['result']['tag']['classes'] prob_list = result['results'][0]['result']['tag']['probs'] class_str = "" for i in range(0, len(class_list)): class_str += class_list[i] + " " # currently just the list of matched words text_output = class_list.__str__() # Parts of speech recognition tokens = nltk.word_tokenize(class_str) dictionary = PyDictionary() nouns = [] verbs = [] adjectives = [] otherPos = [] for word in tokens: definition = dictionary.meaning(word) # https://pypi.python.org/pypi/PyDictionary/1.3.4 assignment = definition.keys()[0] # Get the part of speech from the dictonary # assignment = tuple[1] if assignment == 'Noun': nouns.append(word) elif assignment == 'Verb': verbs.append(word) elif assignment == 'Adjective': adjectives.append(word) else: otherPos.append(word) # Create the grammar #P:prepositions, DET:articles, adverbs P = ["on","in","at","since","for","ago","before","to","past","to","until","by","in","at","on","under","below","over","above","into","from","of","on","at"] DET = ["the","a","one","some","few","a few","the few","some"] assignments = pos_tag(tokens) # tagset='universal' for ADJ, NOUN, etc. pos_tags = [] pos_words = {} for tuple in assignments: word = tuple[0] pos = tuple[1] if pos in pos_words: pos_words[pos].append(word) else: pos_words[pos] = [] pos_tags.append(pos) grammar = """ S -> NP VP PP -> P NP NP -> Det N | Det N PP VP -> V NP | VP PP Det -> 'DT' """ # N -> 'NN' # V -> 'VBZ' # P -> 'PP' # adverb is RB if 'NN' in pos_words: grammar += 'N ->' + ' | '.join(pos_words['NN']) + '\n' if 'VB' in pos_words: grammar += 'V ->' + ' | '.join(pos_words['VB']) + '\n' if 'JJ' in pos_words: grammar += 'A ->' + ' | '.join(pos_words['JJ']) + '\n' simple_grammar = CFG.fromstring(grammar) #simple_grammar.start() simple_grammar.productions() sentences = [] for sentence in generate(simple_grammar, n=10): sentences.append(' '.join(sentence)) # parser = nltk.ChartParser(simple_grammar) # tree = parser.parse(pos_tags) caption = 'this is a caption' story = 'this is the story' return render(request, 'makestory/output.html', { 'nouns_output': nouns, 'verbs_output': verbs, 'adjectives_output': adjectives, 'otherPos_output': otherPos, 'imageURL_output': imageURL, 'caption_output': caption, 'story_output': story, 'sentences_test_output': sentences, } )
from nltk.parse.generate import generate from nltk import CFG from nltk.data import load for gg in [ 'grammar_2.cfg']: grammar = load( 'file:' + gg) for sentence in generate(grammar, depth=6, n=1000000): print(' '.join(sentence))