def test_get_file(self): import pycorpora data = pycorpora.get_file('pycorpora_test', 'test') self.assertEqual(type(data), dict) self.assertEqual(data['tests'], ["one", "two", "three"]) subdata = pycorpora.get_file('pycorpora_test/subdir', 'another_test') self.assertIsNotNone(subdata) self.assertEqual(type(data), dict) self.assertEqual(data['tests'], ["one", "two", "three"])
def test_cache(self): import pycorpora self.assertNotIn('data/pycorpora_test/test.json', pycorpora.cache) data = pycorpora.get_file('pycorpora_test', 'test') self.assertIn('data/pycorpora_test/test.json', pycorpora.cache) data = pycorpora.pycorpora_test.subdir.another_test self.assertIsNotNone(data) self.assertIn('data/pycorpora_test/subdir/another_test.json', pycorpora.cache)
def get_random_corpus(): d = choice(pc.get_categories()) dict = pc.get_file(d, choice(pc.get_files(d))) global title for k,v in dict.items(): if k == 'description': if 'List' in v or 'list' in v: v = v.replace('list', 'Marriage') v = v.replace('List', 'Marriage') title = '##'+v else: title = '##The Marriage of '+v else: corpus = list(v) return title, corpus
def main(): # TODO: Replace with better, sex-separated data namor = SoundNamor( pycorpora.get_file('humans', 'firstNames')['firstNames']) try: while True: cmd = input('Generate name?') if cmd.lower() in ['n', 'no', 'q', 'quit']: break sex = random.choice(['F', 'M']) print(sex, ':', ''.join(namor.generate_name(sex))) except KeyboardInterrupt: pass
def get_tarot_cards(astrological_sign: str) -> List[str]: if len(ASTRO_TAROT) == 0: tarots = pycorpora.get_file( 'divination', 'tarot_interpretations')['tarot_interpretations'] for tarot in tarots: signs: Dict[str, float] = {} for sign, traits in zip( ZODIAC.values(), map(get_astrological_traits, ZODIAC.values())): keywords = get_tarot_keywords(tarot['name']) avg: float = 0 for keyword in keywords: avg += sum( map(lambda x: word_similarity(keyword, x), traits)) avg /= len(keywords) signs[sign] = avg ASTRO_TAROT[min(signs.items(), key=lambda x: x[1])[0]] = tarot return ASTRO_TAROT[astrological_sign]
pycorpora.materials.get_file("natural-materials")["natural materials"] ], "drunkeness": pycorpora.words.states_of_drunkenness.get("states_of_drunkenness"), "personal_noun": pycorpora.words.personal_nouns.get("personalNouns"), "person_description": pycorpora.humans.descriptions.get("descriptions"), "occupation": pycorpora.humans.occupations.get("occupations"), "mood": pycorpora.humans.moods.get("moods"), "diagnosis": [c.get("desc") for c in pycorpora.medicine.diagnoses.get("codes")], "greek_titans": pycorpora.get_file("mythology/greek_titans")["greek_titans"], "vegetable": pycorpora.foods.vegetables.get("vegetables"), "fruit": pycorpora.foods.fruits.get("fruits"), "wine_taste": pycorpora.foods.wine_descriptions.get("wine_descriptions"), "condiment": [c.lower() for c in pycorpora.foods.condiments.get("condiments")], "knot": pycorpora.technology.knots.get("knots"), "unusual_thing": [ "<+feature gemstone>#gemstone#</+>", "<+feature fauna>#common_animal#</+>", "<+feature fruit flora>#fruit#</+>", "<+feature vegetable flora>#vegetable#</+>"
def get_astrological_traits(astrological_sign: str) -> List[str]: return pycorpora.get_file( 'divination', 'zodiac')['western_zodiac'][astrological_sign.capitalize()]['keywords']
avg: float = 0 for keyword in keywords: avg += sum( map(lambda x: word_similarity(keyword, x), traits)) avg /= len(keywords) signs[sign] = avg ASTRO_TAROT[min(signs.items(), key=lambda x: x[1])[0]] = tarot return ASTRO_TAROT[astrological_sign] tarot_keywords = { x['name']: x['keywords'] for x in pycorpora.get_file('divination', 'tarot_interpretations') ['tarot_interpretations'] } @lru_cache() def get_tarot_keywords(tarot_card: str) -> List[str]: return tarot_keywords[tarot_card] nltk.download('names') FIRST_NAMES_FEMALE = frozenset( names.words('female.txt') + list(Provider.first_names_female)) FIRST_NAMES_MALE = frozenset( names.words('male.txt') + list(Provider.first_names_male))
def __init__(self, input_texts: str): nltk.download('punkt') self.nlp = spacy.load('en_core_web_lg') self.summarizer = LsaSummarizer(Stemmer('english')) self.summarizer.stop_words = get_stop_words('english') self.cleaner = CleaningProcessor() self.synonyms: Dict[str, Optional[List[str]]] = {} if path.isfile('src/syns.yaml'): with open('src/syns.yaml', 'r') as f: self.synonyms = yaml.safe_load(f) if self.synonyms is None: self.synonyms = {} self.patterns: Dict[str, str] = OrderedDict() self.rev_patterns: Dict[str, str] = OrderedDict() with open('src/spreadr_shreddr/data.yaml', 'r') as f: data = yaml.safe_load(f) self.patterns.update(data['shorten']) self.patterns.update(data['expand']) data['filler'].extend( pycorpora.get_file('humans', 'prefixes')['prefixes']) self.patterns.update({k: '' for k in data['filler']}) for obj in pycorpora.get_file('words', 'compounds')['compounds']: key = '{} {}'.format(obj['firstWord'], obj['secondWord']) if key not in self.patterns: self.patterns[key] = obj['compoundWord'] self.patterns.update( {k.capitalize(): v.capitalize() for k, v in self.patterns.items()}) self.brits = data['brit_am'] self.murcans = {v: k for k, v in self.brits.items()} changed = False api = Datamuse() for text in input_texts: text >>= self.cleaner for sent in sent_tokenize(text): for index, word in enumerate(self.nlp(sent)): orth = word.orth_.lower() key = self.separator.join((orth, word.tag_)) if key not in self.synonyms: changed = True syns: List[str] = [] if (word.pos_ in UNIVERSAL_TO_DATAMUSE and len(wn.synsets(orth)) <= 1): res = api.words(ml=orth) if len(res) > 0: syns = self._get_synonyms( ' '.join(sent), (index, word), res) if len(syns) > 1: self.synonyms[key] = syns else: self.synonyms[key] = None if changed: changed = False with open('src/syns.yaml', 'a') as f: f.write(yaml.dump({key: self.synonyms[key]}))
def assemble(): # actually make the thing global credits credits = { 'nouns':[], 'houses':[], 'characters':[], 'concepts':[], 'raw_txt':'', 'chapter_count':7, 'chapter_titles':[], 'character_icons':[] } page_counter = 6 # first, make the chapters animals = pycorpora.get_file("animals","common")['animals'] for chapter_counter in range(credits['chapter_count']): result = 0 while (result == 0): animal = random.randrange(0,len(animals)) result = stack(animals[animal], 55) del animals[animal] a_chapter = prepare_chapter(result,int(page_counter) + 2,int(chapter_counter) + 1) page_counter = a_chapter # to make a chapter # pick an animal to stack # make sure it created a valid set # prepare_chapter should return a number, the next chapter should start two pages after, so 10 + 2 = 12, e.g. # prepare the frontmatter # 1r = frontcover # 2l = blank # 3r = title page # 4l = dedication # 5r = toc # 6l = blank # 7r = introduction # 8l = blank, chapter 1 page 0 booktitle = book_title() print "This book is called " + booktitle tpl("templates/frontcover.html","pages/00001r.html",[("book_title",booktitle)]) # blank page tpl("templates/template.html","pages/00002l.html",[("","")]) # title page tpl("templates/titlepage.html","pages/00003r.html",[("book_title",booktitle)]) # dedication page kid_icons = '<div id="kids">' for kid in ["Cecily","Daniel","Serena","Wendy"]: a_kid = re.sub(r"style=\".+?\"","",get_icon(kid,"333333")) kid_icons += a_kid kid_icons += "</div>" tpl("templates/dedication.html","pages/00004l.html",[("kids",kid_icons)]) # toc -- this one has to be a bit more manual toc = '' for ch in range(len(credits['chapter_titles'])): toc_string = '<div class="toc-entry"><span>Chapter ' + str(ch + 1) + '</span><span>' + credits['chapter_titles'][ch][0] + '</span><span>' + str(credits['chapter_titles'][ch][1] - 8) + '</span></div>' toc += toc_string toc += '<div class="toc-entry"><span> </span><span>Credits</span><span>' + str(int(page_counter) - 3) + '</span></div>' tpl("templates/toc.html","pages/00005r.html",[("toc",toc)]) # blank page tpl("templates/template.html","pages/00006l.html",[("","")]) # introduction housecount = quantify("house",amount=len(credits['chapter_titles'])) peoplecount = quantify("person",amount=len(credits['characters'])) tpl("templates/preface.html","pages/00007r.html",[("house_count",housecount),("people_count",peoplecount),("character_names", ", ".join(list(credits['characters']))),("word_count",str(len(re.compile(r" +").split(credits['raw_txt']))))]) # make a The End tpl("templates/last_page.html","pages/" + str(int(page_counter) + 2).zfill(5) + ".html",[("character_names", ", ".join(list(credits['characters'])))]) tpl("templates/the_end.html","pages/" + str(int(page_counter) + 3).zfill(5) + ".html",[("character_icons"," ".join(list(credits['character_icons'])))]) # make the credits make_credits(credits,str(int(page_counter) + 2)) print "Generation complete"
def prepare_chapter(content,startpage,chapter_number): global credits print content # put the list from stack() in reverse ordered = list(content[::-1]) # this will hold the chapter text as it accumulates chapter = '' # this chooses a color palette for this chapter colors = pal() # pick a name for our main character jack = random.choice(pycorpora.get_file("humans","firstnames")['firstNames']) credits['characters'].append(jack) # get an image for this character jack_icon_first = get_icon(jack,random.choice(colors),random.choice(["man","girl","boy","woman","baby","child","grandmother","dude"])) jack_icon = re.sub(r"style=\".+?\"","",jack_icon_first) credits["character_icons"].append(jack_icon_first) # chapter object chapter_object = content[0] # make a chapter title # this will make a title for the chapter chapter_title = str( jack + " and the " + str(chapter_object) + " that " + a(specify(pastify(content[1][0]))) ) credits['chapter_titles'].append((chapter_title, startpage + 3)) # loop through each concept in the stack for page in range(len(ordered)): print "Working on page " + str(page) # the page number page_number = str( startpage + ((page * 2) + 2) ) # a color for this concept color = random.choice(colors) # I don't remember what this does but it's probably important later if (page == len(ordered) - 1): the_concept = chapter_object else: the_concept = pastify(specify(ordered[page][0])) # isolate the current object the_thing = the_concept.split(" the ")[-1] # find an icon. This returns an <img> tag for the icon the_icon_first = get_icon(the_thing,color) the_icon = re.sub(r"style=\".+?\"","",the_icon_first) if (page == 0): # at the beginning of the chapter # make the blank page tpl("templates/template.html", "pages/" + str(startpage).zfill(5) + "l.html",[]) # make the chapter title page print "Making the title page " tpl("templates/chaptertitlepage.html", "pages/" + str(startpage + 1).zfill(5) + "r.html", [("chapter_number",str(chapter_number)),("chapter_title",chapter_title),("character_name",jack),("character_icon",str(jack_icon))]) next_concept = "" # start the chapter string chapter = "that " + jack + " " + random.choice(["built.","built.","built.","built.","built.","built.","built.","built of brick.","divided into several rooms.","found in a neighborhood.","located on an estate."]) else: # I don't know why this is going backwards? next_concept = "<span> that " + pastify(specify(ordered[page - 1][0])).split(" the ")[0] + " the </span>" page_content = [ ("pn",str(int(page_number) - 6)), ("chapter_content",chapter), ("icon",the_icon), ("first_line","This is the <span class='page-object' style='color:#" + color + "'>" + the_thing + "</span>" + next_concept) ] tpl("templates/template.html","pages/" + page_number.zfill(5) + "l.html",page_content) if (page == 0): if (not os.path.isfile("images/house-" + str(chapter_number) + "-watercolor.jpg")): get_flickr_image("house",chapter_number) tpl("templates/rtemplate.html","pages/" + str(int(page_number) + 1).zfill(5) + "r.html",[("imagery",jack_icon_first + "<!-- imagery -->"),("pgbackground","<div class='pg' style='background-image: url(../images/house-" + str(chapter_number) + "-watercolor.jpg)'>")]) elif (page >= 1): tpl ("pages/" + str(int(page_number) - 1).zfill(5) + "r.html", "pages/" + str(int(page_number) + 1).zfill(5) + "r.html",[("imagery",the_icon_first + "<!-- imagery -->")]) # prepend the next_concept variable to the chapter text before it loops again chapter = "<span class='page-object' style='color:#" + color + "'>" + the_thing + "</span>" + next_concept + chapter #raw_txt = str(chapter) raw_txt = re.sub('<[^<]+?>', '', chapter) credits['raw_txt'] += " This is the " + raw_txt return page_number
for l in syn.lemmas(): ret.append(l.name().replace("_", " ")) ret.append(w) synonyms[w] = list(set(ret)) if len(synonyms[w]) == 0: return [w] return synonyms[w] def synexp(ws): ax = [] for w in ws: ax += syns(w) return list(set(ax)) gross = (pycorpora.get_file( "materials", "abridged-body-fluids")["abridged body fluids"] + [ "matted hair", "rotting meat", "maggots", "feces", "used chewing gum", "teeth", "fingers", "spiders", "snakes", "flies", "wasps", "bees", "yellowjackets", "worms" ]) gross = synexp(gross) materials = ( ((pycorpora.get_file("materials", "layperson-metals")["layperson metals"] + pycorpora.get_file("materials", "natural-materials")["natural materials"] + pycorpora.materials["packaging"]["packaging"] + pycorpora.get_file( "materials", "plastic-brands")["plastic brands"]) * 5) + pycorpora.plants.flowers["flowers"] + pycorpora.humans.bodyParts["bodyParts"] + pycorpora.get_file("materials", "decorative-stones")["decorative stones"] + gross) cities = (list(