-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
355 lines (283 loc) · 10.9 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
"""
Gannon Leech
CSCI 3725: Computational Creativity
Mission 6: Poetry Slam
Title: To bot or not to bot
Last Modified: 4/17/21
Description: This is the main class for my poetry slam project.
This class contains most ofthe methods needed to run the program, including
the poetry generation functions, the functions to run the genetic algorithms,
and the functions to save out and speak the poem.
Known Bugs: There are no known bugs
"""
from FileDictionary import fileDictionary
from WordDictionary import wordDictionary
from poem import Poem
import random as rand
import datetime
import os
import re
from gtts import gTTS
def main():
filename = "inputs/"
filename += input("Please enter the file to use as a template ")
n_value = int(input("Please enter what value for n "))
my_word = wordDictionary()
first_line_checker = 0
first_words = ""
full_file = get_file_info(filename).strip('/n')
words = full_file.split()
# Parse the input into the ngrams model
for i in range(len(words) - n_value):
sequence = ""
if first_line_checker == 0:
for k in range(n_value):
first_words = first_words + words[i + k].lower() + " "
first_line_checker += 1
for j in range(n_value):
word = words[i + j]
word = word.lower()
sequence = sequence + word + " "
next_word = i + n_value
my_dict = fileDictionary(sequence)
my_dict.update_dict(words[next_word].lower())
my_word.add_dict(my_dict, words[next_word].lower())
# run the genetic algorithm methods
population = seed_population(my_word, first_words)
counter = 0
max_generations = 20
while counter < max_generations:
for poem in population:
calc_fitness(poem, my_word)
breeding_pool = select_breeding_pool(population)
population = crossover(breeding_pool)
new_population = []
for poem in population:
new_poem = mutate(poem, my_word, n_value)
new_population.append(new_poem)
population = new_population
counter += 1
# read out the best poem and save as a txt file
best_poem = find_max(population)
speak_poem(best_poem)
write_out_poem(best_poem)
def find_max(population):
"""
Returns the poem with the highest fitness from a given population
population (list): a list of poem objects
"""
best_poem = population[0]
best_fitness = best_poem.fitness
for poem in population:
if poem.fitness > best_fitness:
best_poem = poem
best_fitness = poem.fitness
return best_poem
def get_file_info(filename):
"""
Uses the inputted filename to return the file as one long line to
make it easier to parse for n-grams
filename (string): the user inputted name of file to use as
an inspiring set
"""
full_file = ""
file = open(filename, "r")
for line in file.readlines():
all_words = line.split()
for word in all_words:
full_file += word + " "
file.close()
return full_file
def select_breeding_pool(population):
"""
Returns a list of poem objects which represents the breeding pool for
a given generation. The breeding pool is selected from the current
generation by randomly selecting two poems and then choosing the one
that has the highest fitness until a population is generated equal to
the original population
population (list): a list of poem objects representing the current
generation
"""
breeding_pool = []
counter = 0
while counter < len(population):
vals = rand.sample(range(len(population)), 2)
if population[vals[0]].fitness > population[vals[1]].fitness:
breeding_pool.append(population[vals[0]])
else:
breeding_pool.append(population[vals[1]])
counter += 1
return breeding_pool
def crossover(population):
"""
Performs crossover on a given generation of poems, swapping the first
and second halves of each poem in a generation
population (list): A list representing the breeding pool for the
current generation
"""
new_population = []
for i in range(0, len(population), 2):
new_poem1 = Poem()
new_poem1.first_half = population[i].first_half
new_poem1.second_half = population[i + 1].second_half
new_poem2 = Poem()
new_poem2.first_half = population[i + 1].first_half
new_poem2.second_half = population[i].second_half
new_population.append(new_poem1)
new_population.append(new_poem2)
return new_population
def seed_population(my_word, first_words):
"""
Generates a starting population of poems, using n-grams to generate
a word based on a previous sequence of words
my_word ()
first_words (string): a string representing the first n words of
the inspiring test, which are used to start the poem generation
"""
poem_string = first_words
old_sequence = first_words
population = []
# generates 20 poems for a generation
counter = 0
max_poems = 20
while counter < max_poems:
poem = Poem()
# each poem is 50 words
for i in range(50):
if len(poem_string) == 0:
random_word = old_sequence.split()[0]
else:
random_word = rand.choice(poem_string.split())
new_word = my_word.get_next_word(old_sequence, random_word) + " "
poem_string += new_word
line_val = i % 10
if line_val == 0:
poem_string += '\n'
# for the purposes of crossbreeding, splits the poem in half
if len(poem_string.split()) == 25:
if i > 25:
poem.second_half = poem_string
poem_string = ""
else:
poem.first_half = poem_string
poem_string = ""
old_word_list = old_sequence.split()
old_sequence = ""
for k in range(1, len(old_word_list)):
old_sequence = old_sequence + old_word_list[k] + " "
old_sequence = old_sequence + new_word
population.append(poem)
counter += 1
return population
def calc_fitness(poem, word_dictionary):
"""
calculates the fitness of an individual poem based on the variability
of words and how realistic the poem is using the n grams
poem (Poem): the poem to calculate the fitness of
word_dictionary (word_dictionary): the word dictionary to help
calculate how realistic the given poem is
"""
overall_poem = poem.first_half + poem.second_half
variability_score = calc_variability(overall_poem)
likelihood_score = calc_likelihood(overall_poem, word_dictionary)
poem.fitness = likelihood_score - variability_score
def calc_likelihood(poem, word_dictionary):
"""
calculates the likelihood score for a poem based on how realistic the
words are using the n grams model
poem (Poem): the poem to measure
word_dictionary (word_dictionary): the dictionary containing the
n-gram information
"""
likelihood_score = 0
for word in poem.split():
for key in word_dictionary.dictionary.keys():
likelihood_score += \
word_dictionary.dictionary[key].fileDict.get(word, 0)
return likelihood_score
def calc_variability(poem):
"""
calculates the variability of the poem based on the number of unique
words within the poem
poem (Poem): the poem to measure
"""
words = poem.split()
counter = 0
for i in range(len(words) - 1):
if words[i] == words[i + 1]:
counter += 1
return counter * 500
def mutate(poem, word_dictionary, n_value, mutation_prob=0.05):
"""
Handles the mutation functions of the poem by calling helper functions
poem(Poem): the given poem to mutate
word_dictionary (word_dictionary): the dictionary containing the
n-grams information
n_value (int): the user inputted n value
mutation_prob (float): the probability that an indivudal word would be
mutated
"""
new_poem = Poem()
new_poem.first_half = get_words(poem.first_half, word_dictionary,
n_value, mutation_prob)
new_poem.second_half = get_words(poem.second_half, word_dictionary,
n_value, mutation_prob)
return new_poem
def get_words(poem_half, word_dictionary, n_value, mutation_prob):
"""
Runs the mutation for a section of a poem by reselecting a word based
on the n gram model with some small mutation probability
poem_half(string): the given half of the poem to mutate
word_dictionary (word_dictionary): the dictionary containing the
n-grams information
n_value (int): the user inputted n value
mutation_prob (float): the probability that an indivudal word would
be mutated
"""
new_half = ""
half_words = poem_half.split()
for j in range(n_value):
new_half += half_words[j] + " "
for k in range(n_value, len(half_words) - n_value):
chance = rand.uniform(0, 1)
if chance < mutation_prob:
random_word = rand.choice(poem_half.split())
sequence = ""
for j in range(n_value):
sequence += half_words[k + j]
new_half += \
word_dictionary.get_next_word(sequence, random_word) + " "
else:
new_half += half_words[k] + " "
mod_val = k % 10
if mod_val == 0:
new_half += '\n'
for i in range(n_value):
new_half += half_words[len(half_words) - n_value + i] + " "
return new_half
def write_out_poem(poem):
"""Given a poem, writes it as a file in a designated results folder.
poem(poem): Recipe being written to a .txt file
"""
filename = ("outputs/Poem_" + str(datetime.datetime.now()))
with open(filename, 'w+') as file:
file.write(str(poem))
def speak_poem(poem):
"""
Given a poem, saves it as an mp3 and reads it out load, for help, i
used:
https://towardsdatascience.com/easy-text-to-speech-with-python-bfb34250036e
for help with gTTS and this link:
https://stackoverflow.com/questions/34214139/python-keep-only-letters-in-string/34214187
for help cleaning up the string to make it readable
poem (poem): the poem to be read out loud
"""
poem_lines = poem.first_half + poem.second_half
poem_lines = poem_lines.rstrip()
full_poem = re.sub('[^a-zA-z ]+', '', poem_lines)
gts_obj = gTTS(text=poem_lines, lang="en", slow=False)
name = "output_audio/Poem" + str(datetime.datetime.now()) + "mp3"
gts_obj.save(name)
os.system("say " + full_poem)
if __name__ == "__main__":
main()