import json from queneau import WordAssembler from corpus import Corpus import textwrap assembler = WordAssembler(Corpus.load("dinosaurs")) dinos = [] for i in range(2): dino = assembler.assemble_word() if dino[0] in 'AEIO': dino = "an " + dino else: dino = "a " + dino dinos.append(dino) print "Look! Behind that ridge! It's %s fighting %s!" % tuple(dinos)
import json from queneau import WordAssembler from corpus import Corpus import textwrap c = Corpus.load("toxic_chemicals") assembler = WordAssembler(c['chemicals']) for i in range(50): words = [] for j in range(2): b = assembler.assemble_word() words.append(b) print("%s %s"%(tuple(words)))
import json from queneau import WordAssembler from data import load_json import textwrap assembler = WordAssembler(load_json("dinosaurs.json")) dinos = [] for i in range(2): dino = assembler.assemble_word() if dino[0] in 'AEIO': dino = "an " + dino else: dino = "a " + dino dinos.append(dino) print "Look! Behind that ridge! It's %s fighting %s!" % tuple(dinos)
from queneau import WordAssembler import nltk from nltk.tokenize import WordPunctTokenizer from textblob.tokenizers import word_tokenize poem = 'data/poems/book01.txt' with open(poem,'rb') as f: raw = f.read() ### tokenizer = WordPunctTokenizer() ### tokens = tokenizer.tokenize(raw) ### tokens = nltk.word_tokenize(raw) ### text = nltk.Text(tokens) tokens = list(word_tokenize(raw)) words = [w.lower() for w in tokens] vocab = sorted(set(words)) vocab = vocab[15:] # cut out the punctuation corpus = WordAssembler(vocab) for i in range(1000): print corpus.assemble_word(min_length=5)
import json from queneau import WordAssembler import textwrap common_corpus = WordAssembler(json.load( open("data/english_words.common.json"))) full_corpus = WordAssembler(json.load(open("data/english_words.json"))) print 'You know "%s", "%s", and "%s".' % tuple(common_corpus.assemble_word() for i in range(3)) print 'But have you heard of "%s", "%s", or "%s"?' % tuple( full_corpus.assemble_word() for i in range(3))
from queneau import Assembler, WordAssembler import textwrap corpus = Assembler.load(open("data/minor_planets.min.json"), tokens_in='citation') how_many = 100 for i in range(how_many): sentences = [] names = [] for sentence, source in corpus.assemble("f.l", min_length=3): sentences.append(sentence) names.append(source['name']) # Make a new assembler from the names of the asteroids that were chosen. name_assembler = WordAssembler(names) name = name_assembler.assemble_word() print name for s in textwrap.wrap(" ".join(sentences)): print s if i < how_many - 1: print
import random import json from queneau import WordAssembler from corpus import Corpus import textwrap c = Corpus.load("cannabis") assembler = WordAssembler(c) verbs = ["Puff on that", "Puff the", "Smoke that", "Smokin on the", "Smoke on that", "Smoke on this", "Hit the", "Take a toke of the", "Pass the", "Hit the", "Hit this", "Blaze up that", "Blaze on that", "Fire up that", "Light up the", "Blaze some"] for _ in range(25): print("%s %s"%(verbs[random.randint(0,len(verbs)-1)], assembler.assemble_word()))
from queneau import Assembler, WordAssembler import textwrap from corpus import Corpus corpus = Assembler.loadlist(Corpus.load("minor_planets"), tokens_in='citation') how_many = 100 for i in range(how_many): sentences = [] names = [] for sentence, source in corpus.assemble("f.l", min_length=3): sentences.append(sentence) names.append(source['name']) # Make a new assembler from the names of the asteroids that were chosen. name_assembler = WordAssembler(names) name = name_assembler.assemble_word() print(name) for s in textwrap.wrap(" ".join(sentences)): print(s) if i < how_many-1: print()
import json from queneau import WordAssembler import textwrap common_corpus = WordAssembler(json.load(open("data/english_words.common.json"))) full_corpus = WordAssembler(json.load(open("data/english_words.json"))) print 'You know "%s", "%s", and "%s".' % tuple(common_corpus.assemble_word() for i in range(3)) print 'But have you heard of "%s", "%s", or "%s"?' % tuple(full_corpus.assemble_word() for i in range(3))
import json from queneau import WordAssembler from corpus import Corpus import textwrap c = Corpus.load("nsa_projects") assembler = WordAssembler(c['codenames']) for i in range(50): words = [] for j in range(2): b = assembler.assemble_word(pattern="011.", length=5) words.append(b) print("%s has been replaced by %s"%(tuple(words)))