Esempio n. 1
0
def get_stresses(word):
	if ' ' in word:
		parts=[pronouncing.stresses_for_word(sword) for sword in word.split()]
		if len(parts)==2 and len(parts[0])==1 and len(parts[1])==1:
			return set([parts[0][0] + parts[1][0]])
	
	return set(pronouncing.stresses_for_word(word))
Esempio n. 2
0
def stresses_for_text(text):
    line_stresses = []
    for word in text.lower().split():
        word_stresses = pronouncing.stresses_for_word(word)
        if len(word_stresses) == 0:
            return ''
        else:
            line_stresses.extend(pronouncing.stresses_for_word(word)[0])
    return ''.join(line_stresses)
Esempio n. 3
0
def evaluate(prime_str=['f**k'], predict_len=4, temperature=0.8):
    hidden = decoder.init_hidden()
    prime_input = char_tensor(prime_str)
    predicted = prime_str
    target_meter = "1010101010"
    target_rhyme = ""

    # Use priming string to "build up" hidden state
    for p in range(len(prime_str) - 1):
        _, hidden = decoder(prime_input[p], hidden)
    inp = prime_input[-1]
   
    
    while predicted.count("\n") < predict_len:
        output, hidden = decoder(inp, hidden)
        
        # Sample from the network as a multinomial distribution
        output_dist = output.data.view(-1).div(temperature).exp()
        # print(output_dist)
        top_i = torch.multinomial(output_dist, 1)[0]
        # print(top_i)
        
        # Add predicted character to string and use as next input
        predicted_char = vocab[top_i]
        # print(predicted_char)

        if predicted_char.strip() != "":
          if pronouncing.stresses_for_word(predicted_char):
            word_meter = adjusted_meter(pronouncing.stresses_for_word(predicted_char)[0])
            if target_meter.startswith(word_meter):
              predicted += [predicted_char]
              inp = char_tensor([predicted_char])
              target_meter = target_meter[len(word_meter):]
              if target_meter == "":
                final_word = predicted.pop()
                if target_rhyme != "":
                  meter_candidates = pronouncing.search_stresses(pronouncing.stresses_for_word(final_word)[0])
                  rhyme_candidates = pronouncing.rhymes(target_rhyme)
                  candidates = list(set(meter_candidates) & set(rhyme_candidates) & set(vocab))
                  if candidates:
                    #candidates = [output_dist[char_tensor([candidate])] for candidate in candidates if candidate in vocab]
                    #print([char_tensor([candidate]) for candidate in candidates])
                    final_word = random.choice(candidates)
                predicted += [final_word, "\n"]
                print(final_word)
                if target_rhyme == "":
                     target_rhyme = final_word
                else:
                     target_rhyme = ""
                target_meter = "1010101010"  
              elif target_meter == "0":
                target_meter = "1"

    return predicted
Esempio n. 4
0
def japan_test():
    keywords = wikiwords.get_keywords_from_file_and_directory(
        'texts/Japan.txt', 'texts')

    text = librarian.text_from_path('texts/Japan.txt')
    pos_tagged = cfg.pos_tagged_from_text(text)
    pos_dict = cfg.word_frequency_by_pos(pos_tagged)
    pos_dict = {
        k: {vk.lower(): vv
            for vk, vv in v.items()}
        for k, v in pos_dict.items()
    }
    #print(list(pos_dict.items())[:2])

    #print(keywords[:100])

    lines = librarian.lines_from_file('songs/eye-of-the-tiger.txt')
    print(stresses_for_line(lines[10]))

    # "survivor"
    slot = pronouncing.stresses_for_word('survivor')[0]

    #print([pronouncing.stresses_for_word(kw) for kw in keywords[:20]])

    candidates = stress_map(keywords[:100])
    candidates = [
        k for k, v in candidates.items()
        if len(v) > 0 and v[0] == ''.join(slot)
    ]

    tagged_line = cfg.pos_tagged_from_text(lines[10].lower())
    print(tagged_line)
Esempio n. 5
0
def swap(original_word, tag, candidate_words, pos_dict=None):
    """
	if there are any candidates in the list with the same stress
	pattern as the given word, return one of them. otherwise,
	return the original word
	"""

    stresses = pronouncing.stresses_for_word(original_word)

    if len(stresses) > 0:
        slot = stresses[0]
        #print(slot)

        candidates = stress_map(candidate_words)
        candidates = [
            k for k, v in candidates.items()
            if len(v) > 0 and v[0] == ''.join(slot)
        ]
        #print(tag)
        #print(pos_dict[tag])
        if pos_dict:
            candidates = [c for c in candidates if c.lower() in pos_dict[tag]]

    if len(candidates) > 0:
        return random.choice(candidates)
    else:
        return original_word
Esempio n. 6
0
def buildMelodyByWord(tag_tuple, chord):
    stresspattern = []

    # remember, tag_tuple is in the form of ([word],[POS tag])
    word, part_of_speech = tag_tuple

    # find all syllables in each word
    strssptn = pr.stresses_for_word(word.lower())

    # assign one or more musical notes to each syllable
    # do something different based on stress pattern
    stresspattern += strssptn
    tmpbeats = []
    if len(strssptn) == 0:
        tmpbeats.append(0.0)
    else:
        tmpbeats = [1*float(x) for x in strssptn[0]]

    beats = []
    for bt in tmpbeats:
        if bt == 0.0:
            beats.append(1.0)
        else:
            beats.append(2/bt)

    if re.match("^\W$", word) is not None:
        rest = etree.SubElement(tree, 'REST', {'BEATS': ','.join([str(b) for b in beats])})
    else:
        print(part_of_speech)
        composition_rules[part_of_speech](word, beats, chord)
        
    pattern = ''.join(stresspattern)
Esempio n. 7
0
def do_default(word, beats, chord):
    strssptn = pr.stresses_for_word(word.lower())
    print "%s %s" % (word, strssptn)
    
    duration = etree.SubElement(tree, 'DURATION', {'BEATS': ','.join([str(b) for b in beats])})
    pitch = etree.SubElement(duration, 'PITCH', {'NOTE': ','.join([str(n) for n in getNotes(word, strssptn, beats)])})
    pitch.text = word
Esempio n. 8
0
def stresses(word):
    clean = word.strip().lower().strip(string.punctuation)
    possible_stresses = pronouncing.stresses_for_word(clean)
    try:
        stresses = possible_stresses[0]
    except IndexError:
        stresses = "".join("1" for _ in range(count_syllables(clean)))
    return stresses
Esempio n. 9
0
def compose_verb(word, beats, chord):
    strssptn = pr.stresses_for_word(word.lower())
    print "%s %s" % (word, strssptn)
    new_beats = [x*1.5 for x in beats]
    noteList = ["%s%s" % (random.choice(chord),str(octaveNumber)) for b in beats]
    
    duration = etree.SubElement(tree, 'DURATION', {'BEATS': ','.join([str(b) for b in new_beats])})
    pitch = etree.SubElement(duration, 'PITCH', {'NOTE': ','.join(noteList)})
    pitch.text = word
Esempio n. 10
0
def compose_adj(word, beats, chord):
    strssptn = pr.stresses_for_word(word.lower())
    print "%s %s" % (word, strssptn)
    new_beats = [x*3 for x in beats]
    octaveChange = octaveNumber + random.choice([-1,1])
    noteList = ["%s%s" % (c,str(octaveChange)) for c in chord[:len(new_beats)]]
    
    duration = etree.SubElement(tree, 'DURATION', {'BEATS': ','.join([str(b) for b in new_beats])})
    pitch = etree.SubElement(duration, 'PITCH', {'NOTE': ','.join(noteList)})
    pitch.text = word
Esempio n. 11
0
def find_stress(line):
    stresslist = []
    words = line.split()
    for word in words:
        word = word.strip('",.?!;:').lower()
        s = pr.stresses_for_word(word)
        if any(x == '0' for x in s):
            stresslist.append('0')
        elif len(s) != 0:
            stresslist.append(s[0])
    stress = ''.join(stresslist)
    return stress
Esempio n. 12
0
    def get_stresses(word):
        stresses = set()

        for syllables in pr.stresses_for_word(word):
            # Identify type 2 stresses with type 1
            syllables = syllables.translate({ord('2'): u'1'})
            # Number of syllables of the word
            syllables_length = len(syllables)
            # Trick from (Ghazvininejad et al., 2016)
            if syllables_length > 2 and syllables[-3:] == '100':
                syllables = '%s%s' % (syllables[:-1], '1')
            stresses.add(syllables)

        return stresses
Esempio n. 13
0
def stress_options_for_word(word):

    options = pronouncing.stresses_for_word(word)

    all_options = set(options)

    for option in options:
        if len(option) == 1:
            all_options.add(option)
            #all_options.add('1')
            #all_options.add('0')
        else:
            all_options.add(option)
            all_options.add(option.replace(
                '2', '0'))  # secondary treated as unstressed
            all_options.add(option.replace(
                '2', '1'))  # secondary treated as stressed

    return list(all_options)
Esempio n. 14
0
    def __init__(self, wordlistfile):
        commonwords = np.loadtxt(wordlistfile, dtype=str)

        common_phones = [P.phones_for_word(x) for x in commonwords]
        pronunciation_known = [len(x) >= 1 for x in common_phones]
        self.commonwords = list(commonwords[pronunciation_known])
        self.nallwords = len(self.commonwords)

        common_phones = [P.phones_for_word(x)[0] for x in self.commonwords]
        common_nsyllables = [P.syllable_count(x) for x in common_phones]
        common_rhymes = [
            list(set(P.rhymes(x)) & set(commonwords)) for x in self.commonwords
        ]
        self.common_rhyme_indices = [[
            self.commonwords.index(x) for x in rhymes
        ] for rhymes in common_rhymes]
        self.common_stresses = [
            P.stresses_for_word(x)[0] for x in self.commonwords
        ]
Esempio n. 15
0
def compose_noun(word, beats, chord):
    strssptn = pr.stresses_for_word(word.lower())
    print "%s %s %s" % (word, strssptn, len(beats))
    noteList = []
    tmpOctaveNumber = octaveNumber
    for c in range(0,len(beats)):
        print(chord[c % len(chord)])
        if (c > 0) & (chord[c % len(chord)] < chord[(c % len(chord))-1]):
            tmpOctaveNumber += 1
            
        noteList.append("%s%s" % (chord[c % len(chord)],str(tmpOctaveNumber)) )

    ## if the word only has two syllables,
    ## make the last syllable the highest note in the chord.
    if (len(noteList) == 2) & (strssptn[-1] == 1):
        noteList[-1] = "%s%s" % (chord[-1],str(tmpOctaveNumber)) 
        
    duration = etree.SubElement(tree, 'DURATION', {'BEATS': ','.join([str(b) for b in beats])})
    pitch = etree.SubElement(duration, 'PITCH', {'NOTE':  ','.join(noteList) })
    pitch.text = word
Esempio n. 16
0
import pronouncing, random

FIRSTWORD = 'alexander'
SECONDWORD = 'hamilton'


def nsyl(word):
    pronunciation_list = pronouncing.phones_for_word(word)
    return pronouncing.syllable_count(pronunciation_list[0])


firstnum = nsyl(FIRSTWORD)
secondnum = nsyl(SECONDWORD)
firstwords = []
secondwords = []
first_stresses = set(pronouncing.stresses_for_word(FIRSTWORD))
second_stresses = set(pronouncing.stresses_for_word(SECONDWORD))
for word in pronouncing.search('.*'):
    if "'" in word:
        continue
    cnt = nsyl(word)
    if cnt in (firstnum, secondnum):
        stresses = set(pronouncing.stresses_for_word(word))
        if cnt == firstnum and stresses & first_stresses:
            firstwords.append(word)
        if cnt == secondnum and stresses & second_stresses:
            secondwords.append(word)

for i in range(50):
    print random.choice(firstwords), random.choice(secondwords)
def get_stresses(word):
    result = pronouncing.stresses_for_word(word)
    return result
Esempio n. 18
0
 def test_stresses_for_word(self):
     stresses = pronouncing.stresses_for_word("permit")
     self.assertEqual(["01", "12"], stresses)
Esempio n. 19
0
def evaluate_prob(prime_str=['f**k'], predict_len=4, temperature=0.8):
    hidden = decoder.init_hidden()
    prime_input = char_tensor(prime_str)
    predicted = prime_str
    target_meter = "1010101010"
    target_rhyme = ""

    # Use priming string to "build up" hidden state
    for p in range(len(prime_str) - 1):
        _, hidden = decoder(prime_input[p], hidden)
    inp = prime_input[-1]
   
    count = 0
    while predicted.count("\n") < predict_len:
        count = count + 1
        output, hidden = decoder(inp, hidden)
        
        # Sample from the network as a multinomial distribution
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0]
#         max_prob = 0.0
#         top_i = 0
#         for j in range(len(output_dist)):
#           if output_dist[j] > max_prob:
#             max_prob = output_dist[j]
#             top_i = j
        
        # Add predicted character to string and use as next input
        predicted_char = vocab[top_i]
        
        while predicted_char.strip() == "":
#           output_dist[top_i] = 0.0
#           max_prob = 0.0
#           top_i = 0
#           for j in range(len(output_dist)):
#             if output_dist[j] > max_prob:
#               max_prob = output_dist[j]
#               top_i = j
          top_i = torch.multinomial(output_dist, 1)[0]
          predicted_char = vocab[top_i]

        if predicted_char.strip() != "":
          meter = True
          while meter:
            if pronouncing.stresses_for_word(predicted_char):
              word_meter = adjusted_meter(pronouncing.stresses_for_word(predicted_char)[0])
              if target_meter.startswith(word_meter):
                meter = False
              else:
                top_i = torch.multinomial(output_dist, 1)[0]
                predicted_char = vocab[top_i]
            else:
              top_i = torch.multinomial(output_dist, 1)[0]
              predicted_char = vocab[top_i]
          if pronouncing.stresses_for_word(predicted_char):
            word_meter = adjusted_meter(pronouncing.stresses_for_word(predicted_char)[0])
            if target_meter.startswith(word_meter):
              predicted += [predicted_char]
              inp = char_tensor([predicted_char])
              target_meter = target_meter[len(word_meter):]
              if target_meter == "":
                final_word = predicted.pop()
                if target_rhyme != "":
                  meter_candidates = pronouncing.search_stresses(pronouncing.stresses_for_word(final_word)[0])
                  rhyme_candidates = pronouncing.rhymes(target_rhyme)
                  candidates = list(set(meter_candidates) & set(rhyme_candidates) & set(vocab))
                  if candidates:
                    #candidates = [output_dist[char_tensor([candidate])] for candidate in candidates if candidate in vocab]
                    #print([char_tensor([candidate]) for candidate in candidates])
                    max_prob = 0.0
                    max_index = 0
                    for i in range(len(output_dist)):
                      if vocab[i] in candidates and output_dist[i] > max_prob:
                        max_prob = output_dist[i]
                        max_index = i
                    final_word = vocab[max_index]
                predicted += [final_word, "\n"]
                inp = char_tensor([final_word])
                if target_rhyme == "":
                     target_rhyme = final_word
                else:
                     target_rhyme = ""
                target_meter = "1010101010"  
              elif target_meter == "0":
                target_meter = "1"

    #print(count)
    return predicted
Esempio n. 20
0
def stress_map(words):
    """
	returns dictionary mapping words to stress patterns
	"""
    # note: currently taking stress only for PRIMARY pronunciation
    return {word: pronouncing.stresses_for_word(word) for word in words}
Esempio n. 21
0
 def test_stresses_for_word_uppercase(self):
     stresses = pronouncing.stresses_for_word('PERMIT')
     self.assertEqual(['01', '12'], stresses)
Esempio n. 22
0
 def test_stresses_for_word(self):
     stresses = pronouncing.stresses_for_word('permit')
     self.assertEqual(['01', '12'], stresses)
Esempio n. 23
0
def matches_stress_pattern(string, pattern):
    stresses = pronouncing.stresses_for_word(string)
    regexp = re.compile(pattern)
    return any(map(regexp.search, stresses))
Esempio n. 24
0
 def test_stresses_for_word_uppercase(self):
     stresses = pronouncing.stresses_for_word('PERMIT')
     self.assertEqual(['01', '12'], stresses)
 def test_stresses_for_word(self):
     stresses = pronouncing.stresses_for_word('permit')
     self.assertEqual(['01', '12'], stresses)
Esempio n. 26
0
def get_stresses(word):
	if ' ' in word:
		parts=[pronouncing.stresses_for_word(sword) for sword in word.split()]
		if len(parts)==2 and len(parts[0])==1 and len(parts[1])==1:
			return set([parts[0][0] + parts[1][0]])
	
	return set(pronouncing.stresses_for_word(word))

NUMS=[nsyl(w) for w in WORDS]
words=[[] for _ in range(len(WORDS))]

STRESSES=[get_stresses(w) for w in WORDS]
print STRESSES

for word in pronouncing.search('.*'):
	if "'" in word:
		continue
	cnt = nsyl(word)
	if cnt in NUMS:
		stresses = set(pronouncing.stresses_for_word(word))
		for num,output,outstresses in zip(NUMS,words,STRESSES):
			if num==cnt and stresses & outstresses:
				output.append(word)
print [len(w) for w in words]
for i in range(50):
	out=[]
	for part in words:
		out.append(random.choice(part))
	print ' '.join(out)