for i in range(how_many): sentences = [] names = [] genres = [] mechanics = [] for line, source in corpus.assemble("0.l"): if no_punctuation_at_end.search(line): line += "." sentences.append(line) names.append(source['name']) genres.append([genre for id, genre in source.get('boardgamecategory', [])]) mechanics.append([mechanic for id, mechanic in source.get('boardgamemechanic', [])]) # Make assemblers for single- and multi-word names. single_word_assembler = WordAssembler() multi_word_assembler = Assembler() # Create a composite assembler that will choose single- and # multi-word names in appropriate proportion. name_assembler = CompositeAssembler([single_word_assembler, multi_word_assembler]) for name in names: words = whitespace.split(name) if len(words) == 1: single_word_assembler.add(name) else: multi_word_assembler.add(words) assembler, choice = name_assembler.assemble() if assembler == single_word_assembler: separator = '' else:
import json from queneau import WordAssembler from corpus import Corpus import textwrap assembler = WordAssembler(Corpus.load("dinosaurs")) dinos = [] for i in range(2): dino = assembler.assemble_word() if dino[0] in 'AEIO': dino = "an " + dino else: dino = "a " + dino dinos.append(dino) print "Look! Behind that ridge! It's %s fighting %s!" % tuple(dinos)
import json from queneau import WordAssembler from corpus import Corpus import textwrap c = Corpus.load("toxic_chemicals") assembler = WordAssembler(c['chemicals']) for i in range(50): words = [] for j in range(2): b = assembler.assemble_word() words.append(b) print("%s %s"%(tuple(words)))
import json from queneau import WordAssembler from data import load_json import textwrap assembler = WordAssembler(load_json("dinosaurs.json")) dinos = [] for i in range(2): dino = assembler.assemble_word() if dino[0] in 'AEIO': dino = "an " + dino else: dino = "a " + dino dinos.append(dino) print "Look! Behind that ridge! It's %s fighting %s!" % tuple(dinos)
from queneau import WordAssembler import nltk from nltk.tokenize import WordPunctTokenizer from textblob.tokenizers import word_tokenize poem = 'data/poems/book01.txt' with open(poem,'rb') as f: raw = f.read() ### tokenizer = WordPunctTokenizer() ### tokens = tokenizer.tokenize(raw) ### tokens = nltk.word_tokenize(raw) ### text = nltk.Text(tokens) tokens = list(word_tokenize(raw)) words = [w.lower() for w in tokens] vocab = sorted(set(words)) vocab = vocab[15:] # cut out the punctuation corpus = WordAssembler(vocab) for i in range(1000): print corpus.assemble_word(min_length=5)
import json from queneau import WordAssembler import textwrap common_corpus = WordAssembler(json.load( open("data/english_words.common.json"))) full_corpus = WordAssembler(json.load(open("data/english_words.json"))) print 'You know "%s", "%s", and "%s".' % tuple(common_corpus.assemble_word() for i in range(3)) print 'But have you heard of "%s", "%s", or "%s"?' % tuple( full_corpus.assemble_word() for i in range(3))
import json from queneau import WordAssembler from corpus import Corpus import textwrap common_corpus = WordAssembler(Corpus.load("english_words")) full_corpus = WordAssembler(Corpus.load("english_words")) print 'You know "%s", "%s", and "%s".' % tuple(common_corpus.assemble_word() for i in range(3)) print 'But have you heard of "%s", "%s", or "%s"?' % tuple( full_corpus.assemble_word() for i in range(3))
from queneau import Assembler, WordAssembler import textwrap corpus = Assembler.load(open("data/minor_planets.min.json"), tokens_in='citation') how_many = 100 for i in range(how_many): sentences = [] names = [] for sentence, source in corpus.assemble("f.l", min_length=3): sentences.append(sentence) names.append(source['name']) # Make a new assembler from the names of the asteroids that were chosen. name_assembler = WordAssembler(names) name = name_assembler.assemble_word() print name for s in textwrap.wrap(" ".join(sentences)): print s if i < how_many - 1: print
import random import json from queneau import WordAssembler from corpus import Corpus import textwrap c = Corpus.load("cannabis") assembler = WordAssembler(c) verbs = ["Puff on that", "Puff the", "Smoke that", "Smokin on the", "Smoke on that", "Smoke on this", "Hit the", "Take a toke of the", "Pass the", "Hit the", "Hit this", "Blaze up that", "Blaze on that", "Fire up that", "Light up the", "Blaze some"] for _ in range(25): print("%s %s"%(verbs[random.randint(0,len(verbs)-1)], assembler.assemble_word()))
from queneau import Assembler, WordAssembler import textwrap from corpus import Corpus corpus = Assembler.loadlist(Corpus.load("minor_planets"), tokens_in='citation') how_many = 100 for i in range(how_many): sentences = [] names = [] for sentence, source in corpus.assemble("f.l", min_length=3): sentences.append(sentence) names.append(source['name']) # Make a new assembler from the names of the asteroids that were chosen. name_assembler = WordAssembler(names) name = name_assembler.assemble_word() print(name) for s in textwrap.wrap(" ".join(sentences)): print(s) if i < how_many-1: print()
import json from queneau import WordAssembler import textwrap common_corpus = WordAssembler(json.load(open("data/english_words.common.json"))) full_corpus = WordAssembler(json.load(open("data/english_words.json"))) print 'You know "%s", "%s", and "%s".' % tuple(common_corpus.assemble_word() for i in range(3)) print 'But have you heard of "%s", "%s", or "%s"?' % tuple(full_corpus.assemble_word() for i in range(3))
import json from queneau import WordAssembler from corpus import Corpus import textwrap c = Corpus.load("nsa_projects") assembler = WordAssembler(c['codenames']) for i in range(50): words = [] for j in range(2): b = assembler.assemble_word(pattern="011.", length=5) words.append(b) print("%s has been replaced by %s"%(tuple(words)))