/
phonemer.py
183 lines (138 loc) · 3.92 KB
/
phonemer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
import sys, io
import markov
import pickle
import random
linecount = 0;
# test_word = sys.argv[1]
file_name = sys.argv[1]
phoneme_dict = {}
syllable_dict = {}
vowel_dict = {}
first_word = 'It'
prev_word = 'It'
# n-gram size for markov
n = 1
# return array where
# [0] is string of 0 1 2 corresponding to the stress of the vowel phonemes
# [1] is array of vowel phonemes + stress
def count_syls(phonemes):
ret = ['', [] ]
for i in phonemes:
for j in range(0, 3):
if str(j) in i:
ret[0] = ret[0] + str(j)
ret[1].append(i)
return ret
# make dict of phonemes...
for line in open('cmudict-0.7b.txt', 'r'):
line = line.strip()
words = line.split()
word = words[0]
phonemes = words[1:]
# convert to array of syllables as 0, 1, 2
syls_array = count_syls(phonemes)
syls = syls_array[0]
vowels = syls_array[1]
syllable_dict[word] = syls
phoneme_dict[word] = phonemes
vowel_dict[word] = vowels
# convert song lyrics to syllables
song_syls = ''
song_lines = ''
for line in open(file_name, 'r'):
song_lines += line
line = line.strip()
words = line.split()
for word in words:
word = word.upper()
for char in word:
# filter out non alpha but leave apostrophes
if not char.isalpha() and char is not "'":
word = word.replace(char, "")
try:
song_syls += str(syllable_dict[word])
# break
except:
print 'no word: '
song_syls += '\n'
# print song_syls
# print phoneme_dict[test_word]
# print syllable_dict[test_word]
# print vowel_dict[test_word]
# print 'it is ' + str(syllable_dict["IT"])
with open('yelp_markov_small.pickle', 'rb') as handle:
model = pickle.load(handle)
def next_word(word):
ret = markov.generate(model, n, seed=word, max_iterations=1)
while ( (len(ret) < 2) or (len(ret[1].strip()) <1) ):
ret = markov.generate(model, n, max_iterations=1)
print 'next word: ' + ret[1].strip()
return ret[1].strip()
# def write_new_line(current)
def writenewline(current_line, prev_word):
newsong_line = ' '
nextword_syl = ''
# for each line create a string current_line of syllables in the input song
# and a string newsong_line of new lyrics
# pick words from a markov chain to match the syllables/emphasis in the current line
c = len(current_line)
#print "len current line: " + str(c)
# limit = 0
# while (c > 0) and (limit < 5): #for testing
while c > 0:
# use markov function to choose a next word
nextword = next_word(prev_word.upper())
nextword_syl = ''
while len(nextword_syl) < 1:
try:
# get syllable string for next word
nextword_syl = syllable_dict[nextword]
except:
nextword = next_word(random.choice(model.keys())[0].upper())
next_syl_count = len(nextword_syl)
# if the next word chosen by the markov chain has the correct
# syllable count, it will be added to the new song lyrics
# otherwise a new word will be chosen
if c > next_syl_count:
for i in range(next_syl_count):
if current_line[i] != nextword_syl[i]:
break
current_line = current_line[next_syl_count:]
newsong_line += nextword + ' '
c = len(current_line)
prev_word = nextword
else:
break
# limit += 1
# print current_line
# return newsong_line
print 'c = ' + str(c)
return newsong_line
# writenewline("110010001010100", "THEY")
new_lyrics = ''
for line in song_syls.split('\n'):
if len(line) > 0:
# print line
start_word = random.choice(model.keys())[0]
new_line = writenewline(line, start_word) + '\n'
# print line
# print new_line
new_lyrics += new_line
else:
new_lyrics += '\n'
# new_lyrics += '\n'
# print new_lyrics
# print every line of original followed by lines of new lyrics
index = 0
for line in song_lines.split('\n'):
print 'ORIG: ' + line
print 'NEW: ' + new_lyrics.split('\n')[index]
index += 1
print '\n'
# prev_word = "it"
# nextword = next_word(prev_word.upper())[1]
# print nextword
# print syllable_dict[nextword]
# print next_word(prev_word.upper())
# TO DO: create signature for lyrics
# for line in open(file_name, 'r'):