Esempio n. 1
0
def prepare_input(file, type):
    '''
    Prepares the input from a textfile to analyze either words or meter

    Parameters
      file, string: a path to a valid textfile
      type, string: either "words" or "meter"

    Returns
      output, list: the words of the file are filtered for only for alphabetic
                     characters
      if type == "words"
        return a list of each word from the file
      if type == "meters"
        returns a pair of lists with a list of words in the first index and
        a list of corressponding meters for those words in the second index

    Pre/postconditions
      none additional
    '''
    with open(file) as f:
        file_string = f.read()

    filter_fn = lambda c: c.isalpha() or c ==" "
    filtered_string = "".join(filter(filter_fn, file_string))

    if type == "words":
        output = filtered_string.split()
    elif type == "meters":
        tokens = poetrytools.tokenize(filtered_string)
        output = [tokens[0], poetrytools.scanscion(tokens)[0]]
    return output
Esempio n. 2
0
def analyze_syllables(word_lists):
    """
    For each word in the list of lists of words, return its syllable stresses
    and its syllable count.
    :param word_lists: A list of lists of word.
    :return: stresses: Stress notations for each word. Same shape as
    'word_lists'
    :return: syllables: Syllable counts for each word. Same shape as
    'word_lists'
    """
    stresses = poetrytools.scanscion(word_lists)

    # deep copy
    syllables = copy.deepcopy(stresses)
    for i in range(len(syllables)):
        for j in range(len(syllables[i])):
            syllables[i][j] = len(syllables[i][j])

    return stresses, syllables
Esempio n. 3
0
    def get_score(self):
        score = 0

        # score rhymes
        rhyme_score = 0
        rhyme_check = [0, 1, 4, 5, 8, 9]
        last_word_list = []

        # get words at end of each line to check
        for line in self.sonnet:
            last_word = line[len(line) - 1].name
            last_word_list.append(last_word)

        # check for ababcdcdefefgg rhyme scheme and add to score for each good rhyme
        for word in rhyme_check:
            if poetrytools.rhymes(last_word_list[word],
                                  last_word_list[word + 2]):
                rhyme_score += 1
        if poetrytools.rhymes(last_word_list[12], last_word_list[13]):
            rhyme_score += 1

        # score syllables
        syllable_score = 0
        for line in self.sonnet:
            syllable_count = 0
            for word in line:
                syllable_count += word.get_syllables()
            syllable_score -= abs(10 - syllable_count)

        #score meter
        meter_score = 0
        str_rep = self.get_str()
        poem = poetrytools.tokenize(str_rep)
        meter = poetrytools.scanscion(poem)
        for line in meter:
            for word in line:
                if word == "01":
                    meter_score += 1

        score += rhyme_score + syllable_score + meter_score
        scores = [score, rhyme_score, 140 - abs(syllable_score), meter_score]

        return scores
Esempio n. 4
0
for key in poems:
    poem_dict = {"poem_id": key}
    print(count)
    if count == 1234 or count == 1034:
        # randomly blank poems so skip them
        count = count + 1
        continue
    mypoem = poetrytools.tokenize(poems[key])

    # rhyme_scheme returns "X a b X X a X a b c X c"
    rhyme_scheme = ' '.join(poetrytools.rhyme_scheme(mypoem))
    poem_dict["rhyme_scheme"] = rhyme_scheme

    # scanscion returns "0 1 1 10 10 / 1 10 1 01 / 0 1 01 1 10 / 1 1 1 0 10 1 / 1 1 0 1 010 / 0 100 1 1 / 1 1 1 01 1 / 0 1 1 1 0 1 / 1 1 1 1 1 10 / 1 1 1 1 1 1 / 10 10 10 / 1 1 1 1 1"
    scanscions = []
    for item in poetrytools.scanscion(mypoem):
        scanscion = ' '.join(item)
        scanscions.append(scanscion)
        # print("scanscion: " + scanscion)
    poem_dict["scanscion"] = scanscions

    # stanza_lengths returns "12"
    stanza_lengths = poetrytools.stanza_lengths(mypoem)
    poem_dict["stanza_lengths"] = stanza_lengths

    # meter_guess returns "iambic trimeter" (as a very rough guess)
    meter_guess = poetrytools.guess_metre(mypoem)[3]
    poem_dict["meter_guess"] = meter_guess
    print(poem_dict)
    count += 1
    # load results in database
Esempio n. 5
0
 def get_syllables(self):
     import poetrytools
     self.syllables = poetrytools.scanscion(self.x_batches)
     for line in self.syllables:
         pass
# i-1 because the observations are 1-indexed
translated_emission = [conv_words[i] for i in emission]
line = ''
for word in translated_emission:
    line += word + ' '
print(line)
print(emission)

# Print transition matrix.
A_vis = np.around(np.array(hmm.A), 4)
print(A_vis)

# Obtain the top 10 words for each state.
O = np.array(hmm.O)
for i in range(len(O)):
    state_emissions = O[i, :]
    top_emissions = state_emissions.argsort()[-10:][::-1]
    top_probs = [state_emissions[j] for j in top_emissions]
    top_words = [conv_words[j] for j in top_emissions]

    pos = nltk.pos_tag(top_words)
    pos = [x[1] for x in pos]

    stresses = poetrytools.scanscion([top_words])[0]

    print()
    print(top_words)
    print(np.around(top_probs, 4))
    print(pos)
    print(stresses)
import poetrytools

x = [["spring"]]

syllables = poetrytools.scanscion(x)

#y = "spring"
#print(poetrytools.stress(y))


def this():
    for y in ["spring", "project", "attribute", "insult"]:
        print(poetrytools.stress(y, "min"))


m = poetrytools.stress("nightingale", "all")
print m

words = "the nightingales thy coming each where sing:".split(" ")
m = ''.join([poetrytools.stress(x, "min") for x in words])
print(m)
print(len(m))
Esempio n. 8
0
if TEST:
    OUTPUT = r"../data/test/input.txt"
    VOCAB = 300
    SUFFIX = ""    
else:
    OUTPUT = r"../data/large/gutenberg.txt"
    VOCAB = 40000
    SUFFIX = "NEW"    

def write_out(text, f):        
    # Write out
    with open(f, "w") as fobj:
        fobj.write(text)


def read_in(ff):
    with open(ff, "r") as fobj:
        return fobj.read()


# Prune vocab for most common words
text = read_in(OUTPUT)

#write_out(text, out)
poem = poetrytools.tokenize(text) # need to tokenize the poem first
print(poem)
x = poetrytools.scanscion(poem)
print(x)

poetrytools.guess_form(poem, verbose=True)