def get_stresses(word): if ' ' in word: parts=[pronouncing.stresses_for_word(sword) for sword in word.split()] if len(parts)==2 and len(parts[0])==1 and len(parts[1])==1: return set([parts[0][0] + parts[1][0]]) return set(pronouncing.stresses_for_word(word))
def stresses_for_text(text): line_stresses = [] for word in text.lower().split(): word_stresses = pronouncing.stresses_for_word(word) if len(word_stresses) == 0: return '' else: line_stresses.extend(pronouncing.stresses_for_word(word)[0]) return ''.join(line_stresses)
def evaluate(prime_str=['f**k'], predict_len=4, temperature=0.8): hidden = decoder.init_hidden() prime_input = char_tensor(prime_str) predicted = prime_str target_meter = "1010101010" target_rhyme = "" # Use priming string to "build up" hidden state for p in range(len(prime_str) - 1): _, hidden = decoder(prime_input[p], hidden) inp = prime_input[-1] while predicted.count("\n") < predict_len: output, hidden = decoder(inp, hidden) # Sample from the network as a multinomial distribution output_dist = output.data.view(-1).div(temperature).exp() # print(output_dist) top_i = torch.multinomial(output_dist, 1)[0] # print(top_i) # Add predicted character to string and use as next input predicted_char = vocab[top_i] # print(predicted_char) if predicted_char.strip() != "": if pronouncing.stresses_for_word(predicted_char): word_meter = adjusted_meter(pronouncing.stresses_for_word(predicted_char)[0]) if target_meter.startswith(word_meter): predicted += [predicted_char] inp = char_tensor([predicted_char]) target_meter = target_meter[len(word_meter):] if target_meter == "": final_word = predicted.pop() if target_rhyme != "": meter_candidates = pronouncing.search_stresses(pronouncing.stresses_for_word(final_word)[0]) rhyme_candidates = pronouncing.rhymes(target_rhyme) candidates = list(set(meter_candidates) & set(rhyme_candidates) & set(vocab)) if candidates: #candidates = [output_dist[char_tensor([candidate])] for candidate in candidates if candidate in vocab] #print([char_tensor([candidate]) for candidate in candidates]) final_word = random.choice(candidates) predicted += [final_word, "\n"] print(final_word) if target_rhyme == "": target_rhyme = final_word else: target_rhyme = "" target_meter = "1010101010" elif target_meter == "0": target_meter = "1" return predicted
def japan_test(): keywords = wikiwords.get_keywords_from_file_and_directory( 'texts/Japan.txt', 'texts') text = librarian.text_from_path('texts/Japan.txt') pos_tagged = cfg.pos_tagged_from_text(text) pos_dict = cfg.word_frequency_by_pos(pos_tagged) pos_dict = { k: {vk.lower(): vv for vk, vv in v.items()} for k, v in pos_dict.items() } #print(list(pos_dict.items())[:2]) #print(keywords[:100]) lines = librarian.lines_from_file('songs/eye-of-the-tiger.txt') print(stresses_for_line(lines[10])) # "survivor" slot = pronouncing.stresses_for_word('survivor')[0] #print([pronouncing.stresses_for_word(kw) for kw in keywords[:20]]) candidates = stress_map(keywords[:100]) candidates = [ k for k, v in candidates.items() if len(v) > 0 and v[0] == ''.join(slot) ] tagged_line = cfg.pos_tagged_from_text(lines[10].lower()) print(tagged_line)
def swap(original_word, tag, candidate_words, pos_dict=None): """ if there are any candidates in the list with the same stress pattern as the given word, return one of them. otherwise, return the original word """ stresses = pronouncing.stresses_for_word(original_word) if len(stresses) > 0: slot = stresses[0] #print(slot) candidates = stress_map(candidate_words) candidates = [ k for k, v in candidates.items() if len(v) > 0 and v[0] == ''.join(slot) ] #print(tag) #print(pos_dict[tag]) if pos_dict: candidates = [c for c in candidates if c.lower() in pos_dict[tag]] if len(candidates) > 0: return random.choice(candidates) else: return original_word
def buildMelodyByWord(tag_tuple, chord): stresspattern = [] # remember, tag_tuple is in the form of ([word],[POS tag]) word, part_of_speech = tag_tuple # find all syllables in each word strssptn = pr.stresses_for_word(word.lower()) # assign one or more musical notes to each syllable # do something different based on stress pattern stresspattern += strssptn tmpbeats = [] if len(strssptn) == 0: tmpbeats.append(0.0) else: tmpbeats = [1*float(x) for x in strssptn[0]] beats = [] for bt in tmpbeats: if bt == 0.0: beats.append(1.0) else: beats.append(2/bt) if re.match("^\W$", word) is not None: rest = etree.SubElement(tree, 'REST', {'BEATS': ','.join([str(b) for b in beats])}) else: print(part_of_speech) composition_rules[part_of_speech](word, beats, chord) pattern = ''.join(stresspattern)
def do_default(word, beats, chord): strssptn = pr.stresses_for_word(word.lower()) print "%s %s" % (word, strssptn) duration = etree.SubElement(tree, 'DURATION', {'BEATS': ','.join([str(b) for b in beats])}) pitch = etree.SubElement(duration, 'PITCH', {'NOTE': ','.join([str(n) for n in getNotes(word, strssptn, beats)])}) pitch.text = word
def stresses(word): clean = word.strip().lower().strip(string.punctuation) possible_stresses = pronouncing.stresses_for_word(clean) try: stresses = possible_stresses[0] except IndexError: stresses = "".join("1" for _ in range(count_syllables(clean))) return stresses
def compose_verb(word, beats, chord): strssptn = pr.stresses_for_word(word.lower()) print "%s %s" % (word, strssptn) new_beats = [x*1.5 for x in beats] noteList = ["%s%s" % (random.choice(chord),str(octaveNumber)) for b in beats] duration = etree.SubElement(tree, 'DURATION', {'BEATS': ','.join([str(b) for b in new_beats])}) pitch = etree.SubElement(duration, 'PITCH', {'NOTE': ','.join(noteList)}) pitch.text = word
def compose_adj(word, beats, chord): strssptn = pr.stresses_for_word(word.lower()) print "%s %s" % (word, strssptn) new_beats = [x*3 for x in beats] octaveChange = octaveNumber + random.choice([-1,1]) noteList = ["%s%s" % (c,str(octaveChange)) for c in chord[:len(new_beats)]] duration = etree.SubElement(tree, 'DURATION', {'BEATS': ','.join([str(b) for b in new_beats])}) pitch = etree.SubElement(duration, 'PITCH', {'NOTE': ','.join(noteList)}) pitch.text = word
def find_stress(line): stresslist = [] words = line.split() for word in words: word = word.strip('",.?!;:').lower() s = pr.stresses_for_word(word) if any(x == '0' for x in s): stresslist.append('0') elif len(s) != 0: stresslist.append(s[0]) stress = ''.join(stresslist) return stress
def get_stresses(word): stresses = set() for syllables in pr.stresses_for_word(word): # Identify type 2 stresses with type 1 syllables = syllables.translate({ord('2'): u'1'}) # Number of syllables of the word syllables_length = len(syllables) # Trick from (Ghazvininejad et al., 2016) if syllables_length > 2 and syllables[-3:] == '100': syllables = '%s%s' % (syllables[:-1], '1') stresses.add(syllables) return stresses
def stress_options_for_word(word): options = pronouncing.stresses_for_word(word) all_options = set(options) for option in options: if len(option) == 1: all_options.add(option) #all_options.add('1') #all_options.add('0') else: all_options.add(option) all_options.add(option.replace( '2', '0')) # secondary treated as unstressed all_options.add(option.replace( '2', '1')) # secondary treated as stressed return list(all_options)
def __init__(self, wordlistfile): commonwords = np.loadtxt(wordlistfile, dtype=str) common_phones = [P.phones_for_word(x) for x in commonwords] pronunciation_known = [len(x) >= 1 for x in common_phones] self.commonwords = list(commonwords[pronunciation_known]) self.nallwords = len(self.commonwords) common_phones = [P.phones_for_word(x)[0] for x in self.commonwords] common_nsyllables = [P.syllable_count(x) for x in common_phones] common_rhymes = [ list(set(P.rhymes(x)) & set(commonwords)) for x in self.commonwords ] self.common_rhyme_indices = [[ self.commonwords.index(x) for x in rhymes ] for rhymes in common_rhymes] self.common_stresses = [ P.stresses_for_word(x)[0] for x in self.commonwords ]
def compose_noun(word, beats, chord): strssptn = pr.stresses_for_word(word.lower()) print "%s %s %s" % (word, strssptn, len(beats)) noteList = [] tmpOctaveNumber = octaveNumber for c in range(0,len(beats)): print(chord[c % len(chord)]) if (c > 0) & (chord[c % len(chord)] < chord[(c % len(chord))-1]): tmpOctaveNumber += 1 noteList.append("%s%s" % (chord[c % len(chord)],str(tmpOctaveNumber)) ) ## if the word only has two syllables, ## make the last syllable the highest note in the chord. if (len(noteList) == 2) & (strssptn[-1] == 1): noteList[-1] = "%s%s" % (chord[-1],str(tmpOctaveNumber)) duration = etree.SubElement(tree, 'DURATION', {'BEATS': ','.join([str(b) for b in beats])}) pitch = etree.SubElement(duration, 'PITCH', {'NOTE': ','.join(noteList) }) pitch.text = word
import pronouncing, random FIRSTWORD = 'alexander' SECONDWORD = 'hamilton' def nsyl(word): pronunciation_list = pronouncing.phones_for_word(word) return pronouncing.syllable_count(pronunciation_list[0]) firstnum = nsyl(FIRSTWORD) secondnum = nsyl(SECONDWORD) firstwords = [] secondwords = [] first_stresses = set(pronouncing.stresses_for_word(FIRSTWORD)) second_stresses = set(pronouncing.stresses_for_word(SECONDWORD)) for word in pronouncing.search('.*'): if "'" in word: continue cnt = nsyl(word) if cnt in (firstnum, secondnum): stresses = set(pronouncing.stresses_for_word(word)) if cnt == firstnum and stresses & first_stresses: firstwords.append(word) if cnt == secondnum and stresses & second_stresses: secondwords.append(word) for i in range(50): print random.choice(firstwords), random.choice(secondwords)
def get_stresses(word): result = pronouncing.stresses_for_word(word) return result
def test_stresses_for_word(self): stresses = pronouncing.stresses_for_word("permit") self.assertEqual(["01", "12"], stresses)
def evaluate_prob(prime_str=['f**k'], predict_len=4, temperature=0.8): hidden = decoder.init_hidden() prime_input = char_tensor(prime_str) predicted = prime_str target_meter = "1010101010" target_rhyme = "" # Use priming string to "build up" hidden state for p in range(len(prime_str) - 1): _, hidden = decoder(prime_input[p], hidden) inp = prime_input[-1] count = 0 while predicted.count("\n") < predict_len: count = count + 1 output, hidden = decoder(inp, hidden) # Sample from the network as a multinomial distribution output_dist = output.data.view(-1).div(temperature).exp() top_i = torch.multinomial(output_dist, 1)[0] # max_prob = 0.0 # top_i = 0 # for j in range(len(output_dist)): # if output_dist[j] > max_prob: # max_prob = output_dist[j] # top_i = j # Add predicted character to string and use as next input predicted_char = vocab[top_i] while predicted_char.strip() == "": # output_dist[top_i] = 0.0 # max_prob = 0.0 # top_i = 0 # for j in range(len(output_dist)): # if output_dist[j] > max_prob: # max_prob = output_dist[j] # top_i = j top_i = torch.multinomial(output_dist, 1)[0] predicted_char = vocab[top_i] if predicted_char.strip() != "": meter = True while meter: if pronouncing.stresses_for_word(predicted_char): word_meter = adjusted_meter(pronouncing.stresses_for_word(predicted_char)[0]) if target_meter.startswith(word_meter): meter = False else: top_i = torch.multinomial(output_dist, 1)[0] predicted_char = vocab[top_i] else: top_i = torch.multinomial(output_dist, 1)[0] predicted_char = vocab[top_i] if pronouncing.stresses_for_word(predicted_char): word_meter = adjusted_meter(pronouncing.stresses_for_word(predicted_char)[0]) if target_meter.startswith(word_meter): predicted += [predicted_char] inp = char_tensor([predicted_char]) target_meter = target_meter[len(word_meter):] if target_meter == "": final_word = predicted.pop() if target_rhyme != "": meter_candidates = pronouncing.search_stresses(pronouncing.stresses_for_word(final_word)[0]) rhyme_candidates = pronouncing.rhymes(target_rhyme) candidates = list(set(meter_candidates) & set(rhyme_candidates) & set(vocab)) if candidates: #candidates = [output_dist[char_tensor([candidate])] for candidate in candidates if candidate in vocab] #print([char_tensor([candidate]) for candidate in candidates]) max_prob = 0.0 max_index = 0 for i in range(len(output_dist)): if vocab[i] in candidates and output_dist[i] > max_prob: max_prob = output_dist[i] max_index = i final_word = vocab[max_index] predicted += [final_word, "\n"] inp = char_tensor([final_word]) if target_rhyme == "": target_rhyme = final_word else: target_rhyme = "" target_meter = "1010101010" elif target_meter == "0": target_meter = "1" #print(count) return predicted
def stress_map(words): """ returns dictionary mapping words to stress patterns """ # note: currently taking stress only for PRIMARY pronunciation return {word: pronouncing.stresses_for_word(word) for word in words}
def test_stresses_for_word_uppercase(self): stresses = pronouncing.stresses_for_word('PERMIT') self.assertEqual(['01', '12'], stresses)
def test_stresses_for_word(self): stresses = pronouncing.stresses_for_word('permit') self.assertEqual(['01', '12'], stresses)
def matches_stress_pattern(string, pattern): stresses = pronouncing.stresses_for_word(string) regexp = re.compile(pattern) return any(map(regexp.search, stresses))
def test_stresses_for_word_uppercase(self): stresses = pronouncing.stresses_for_word('PERMIT') self.assertEqual(['01', '12'], stresses)
def test_stresses_for_word(self): stresses = pronouncing.stresses_for_word('permit') self.assertEqual(['01', '12'], stresses)
def get_stresses(word): if ' ' in word: parts=[pronouncing.stresses_for_word(sword) for sword in word.split()] if len(parts)==2 and len(parts[0])==1 and len(parts[1])==1: return set([parts[0][0] + parts[1][0]]) return set(pronouncing.stresses_for_word(word)) NUMS=[nsyl(w) for w in WORDS] words=[[] for _ in range(len(WORDS))] STRESSES=[get_stresses(w) for w in WORDS] print STRESSES for word in pronouncing.search('.*'): if "'" in word: continue cnt = nsyl(word) if cnt in NUMS: stresses = set(pronouncing.stresses_for_word(word)) for num,output,outstresses in zip(NUMS,words,STRESSES): if num==cnt and stresses & outstresses: output.append(word) print [len(w) for w in words] for i in range(50): out=[] for part in words: out.append(random.choice(part)) print ' '.join(out)