Example #1
0
for i in range(how_many):

    sentences = []
    names = []
    genres = []
    mechanics = []
    for line, source in corpus.assemble("0.l"):
        if no_punctuation_at_end.search(line):
            line += "."
        sentences.append(line)
        names.append(source['name'])
        genres.append([genre for id, genre in source.get('boardgamecategory', [])])
        mechanics.append([mechanic for id, mechanic in source.get('boardgamemechanic', [])])

    # Make assemblers for single- and multi-word names.
    single_word_assembler = WordAssembler()
    multi_word_assembler = Assembler()

    # Create a composite assembler that will choose single- and
    # multi-word names in appropriate proportion.
    name_assembler = CompositeAssembler([single_word_assembler, multi_word_assembler])
    for name in names:
        words = whitespace.split(name)
        if len(words) == 1:
            single_word_assembler.add(name)
        else:
            multi_word_assembler.add(words)
    assembler, choice = name_assembler.assemble()
    if assembler == single_word_assembler:
        separator = ''
    else:
Example #2
0
import json
from queneau import WordAssembler
from corpus import Corpus
import textwrap
assembler = WordAssembler(Corpus.load("dinosaurs"))

dinos = []
for i in range(2):
    dino = assembler.assemble_word()
    if dino[0] in 'AEIO':
        dino = "an " + dino
    else:
        dino = "a " + dino
    dinos.append(dino)

print "Look! Behind that ridge! It's %s fighting %s!" % tuple(dinos)
import json
from queneau import WordAssembler
from corpus import Corpus
import textwrap

c = Corpus.load("toxic_chemicals")
assembler = WordAssembler(c['chemicals'])

for i in range(50):
    
    words = []
    for j in range(2):
        b = assembler.assemble_word()
        words.append(b)

    print("%s %s"%(tuple(words)))
Example #4
0
import json
from queneau import WordAssembler
from data import load_json
import textwrap
assembler = WordAssembler(load_json("dinosaurs.json"))

dinos = []
for i in range(2):
    dino = assembler.assemble_word()
    if dino[0] in 'AEIO':
        dino = "an " + dino
    else:
        dino = "a " + dino
    dinos.append(dino)

print "Look! Behind that ridge! It's %s fighting %s!" % tuple(dinos)
Example #5
0
from queneau import WordAssembler
import nltk
from nltk.tokenize import WordPunctTokenizer
from textblob.tokenizers import word_tokenize

poem = 'data/poems/book01.txt'

with open(poem,'rb') as f:
    raw = f.read()

### tokenizer = WordPunctTokenizer()
### tokens = tokenizer.tokenize(raw)
### tokens = nltk.word_tokenize(raw)
### text = nltk.Text(tokens)

tokens = list(word_tokenize(raw))

words = [w.lower() for w in tokens]

vocab = sorted(set(words))

vocab = vocab[15:] # cut out the punctuation

corpus = WordAssembler(vocab)

for i in range(1000):
    print corpus.assemble_word(min_length=5) 

Example #6
0
import json
from queneau import WordAssembler
import textwrap
common_corpus = WordAssembler(json.load(
    open("data/english_words.common.json")))
full_corpus = WordAssembler(json.load(open("data/english_words.json")))

print 'You know "%s", "%s", and "%s".' % tuple(common_corpus.assemble_word()
                                               for i in range(3))
print 'But have you heard of "%s", "%s", or "%s"?' % tuple(
    full_corpus.assemble_word() for i in range(3))
Example #7
0
import json
from queneau import WordAssembler
from corpus import Corpus
import textwrap
common_corpus = WordAssembler(Corpus.load("english_words"))
full_corpus = WordAssembler(Corpus.load("english_words"))

print 'You know "%s", "%s", and "%s".' % tuple(common_corpus.assemble_word()
                                               for i in range(3))
print 'But have you heard of "%s", "%s", or "%s"?' % tuple(
    full_corpus.assemble_word() for i in range(3))
Example #8
0
from queneau import Assembler, WordAssembler
import textwrap

corpus = Assembler.load(open("data/minor_planets.min.json"),
                        tokens_in='citation')

how_many = 100
for i in range(how_many):

    sentences = []
    names = []
    for sentence, source in corpus.assemble("f.l", min_length=3):
        sentences.append(sentence)
        names.append(source['name'])

    # Make a new assembler from the names of the asteroids that were chosen.
    name_assembler = WordAssembler(names)
    name = name_assembler.assemble_word()
    print name
    for s in textwrap.wrap(" ".join(sentences)):
        print s

    if i < how_many - 1:
        print
Example #9
0
import random
import json
from queneau import WordAssembler
from corpus import Corpus
import textwrap

c = Corpus.load("cannabis")
assembler = WordAssembler(c)

verbs = ["Puff on that",
         "Puff the",
         "Smoke that",
         "Smokin on the",
         "Smoke on that",
         "Smoke on this",
         "Hit the",
         "Take a toke of the",
         "Pass the",
         "Hit the",
         "Hit this",
         "Blaze up that",
         "Blaze on that",
         "Fire up that",
         "Light up the",
         "Blaze some"]

for _ in range(25):

    print("%s %s"%(verbs[random.randint(0,len(verbs)-1)], assembler.assemble_word()))

Example #10
0
from queneau import Assembler, WordAssembler
import textwrap
from corpus import Corpus
corpus = Assembler.loadlist(Corpus.load("minor_planets"),
                            tokens_in='citation')

how_many = 100
for i in range(how_many):

    sentences = []
    names = []
    for sentence, source in corpus.assemble("f.l", min_length=3):
        sentences.append(sentence)
        names.append(source['name'])

    # Make a new assembler from the names of the asteroids that were chosen.
    name_assembler = WordAssembler(names)
    name = name_assembler.assemble_word()
    print(name)
    for s in textwrap.wrap(" ".join(sentences)):
        print(s)

    if i < how_many-1:
        print()
Example #11
0
import json
from queneau import WordAssembler
import textwrap
common_corpus = WordAssembler(json.load(open("data/english_words.common.json")))
full_corpus = WordAssembler(json.load(open("data/english_words.json")))

print 'You know "%s", "%s", and "%s".' % tuple(common_corpus.assemble_word() for i in range(3))
print 'But have you heard of "%s", "%s", or "%s"?' % tuple(full_corpus.assemble_word() for i in range(3))

import json
from queneau import WordAssembler
from corpus import Corpus
import textwrap

c = Corpus.load("nsa_projects")
assembler = WordAssembler(c['codenames'])

for i in range(50):
    
    words = []
    for j in range(2):
        b = assembler.assemble_word(pattern="011.", length=5)
        words.append(b)

    print("%s has been replaced by %s"%(tuple(words)))

Example #13
0
for i in range(how_many):

    sentences = []
    names = []
    genres = []
    mechanics = []
    for line, source in corpus.assemble("0.l"):
        if no_punctuation_at_end.search(line):
            line += "."
        sentences.append(line)
        names.append(source['name'])
        genres.append([genre for id, genre in source.get('boardgamecategory', [])])
        mechanics.append([mechanic for id, mechanic in source.get('boardgamemechanic', [])])

    # Make assemblers for single- and multi-word names.
    single_word_assembler = WordAssembler()
    multi_word_assembler = Assembler()

    # Create a composite assembler that will choose single- and
    # multi-word names in appropriate proportion.
    name_assembler = CompositeAssembler([single_word_assembler, multi_word_assembler])
    for name in names:
        words = whitespace.split(name)
        if len(words) == 1:
            single_word_assembler.add(name)
        else:
            multi_word_assembler.add(words)
    assembler, choice = name_assembler.assemble()
    if assembler == single_word_assembler:
        separator = ''
    else: