Esempio n. 1
0
def main():

    graph = './graphs/verkko.db'
    text = u"Porsas kiiruhti kissan kanssa kotiin."
    tag_list = ['POS=NOUN','POS=VERB']
    tagged = tag_fi.pos_tag(text)
    substitutes = generate_poem.theme_based_words(u'koira', graph, 7)[0]
    replaced = generate_poem.replace_all('fi', tagged, substitutes, tag_list, '', '_')
    morph = add_morph(tagged, replaced)
    print morph
Esempio n. 2
0
def main():

    corpus = './runoutta/runoutta_aakkosellinen.txt'
    graph = './graphs/verkko.db'
    chunk = get_chunk_of_corpus(corpus)
    tag_list = ['POS=NOUN', 'POS=VERB']
    tagged = tag_fi.pos_tag(chunk)
    substitutes = theme_based_words(u'koira', graph, 7)[0]
    replaced = replace_all('fi', tagged, substitutes, tag_list, '', '_')
    generable = finnish_specific.add_morph(tagged, replaced)
    print generable
Esempio n. 3
0
def main():
  
    corpus = './runoutta/runoutta_aakkosellinen.txt'
    graph = './graphs/verkko.db'
    chunk = get_chunk_of_corpus(corpus)
    tag_list = ['POS=NOUN','POS=VERB']
    tagged = tag_fi.pos_tag(chunk)
    substitutes = theme_based_words(u'koira', graph, 7)[0]
    replaced = replace_all('fi', tagged, substitutes, tag_list, '', '_')
    generable = finnish_specific.add_morph(tagged, replaced)
    print generable
Esempio n. 4
0
def generate_poem(language,
                  corpus,
                  db,
                  tag_list,
                  theme='',
                  tag_separator='_',
                  newline='&'):
    """
    Generates a poem.

    :param language: language id, current options en, fr
    :param corpus: poetry corpus
    :param db: graph database
    :param tag_list: list of POS-tags of the words that should be replaced
    :param theme: theme of the poem
    :param tag_separator: character that separates a word and a tag in the graph
    :param newline: the character that marks the place of the newline character
    :return: poem
    """

    # Minimum proportion of new words in the poem
    min_replaced = 0 % 25
    if language == 'fi':
        min_replaced = 0
    # Function to use to tag English.
    # Options:
    # Stanford tagger (recommended): pos_tag_stanford
    tagger = 'pos_tag_stanford'

    # Function to use to tag French.
    # Options:
    # MElt tagger (recommended): pos_tag_melt
    # Stanford tagger (doesn't tag proper nouns): pos_tag_stanford
    tagger_fr = 'pos_tag_melt'

    # Error messages to show to the user
    error_message = 'ERROR\n\
Not enough words found around the theme \'%s\'. Try with another theme.' % theme
    error_message2 = 'ERROR\n\
Sorry, something went wrong. Try again!'

    error = ('', '', error_message, [], '')
    error2 = ('', '', error_message2, [], '')

    theme = unicode(theme)

    while True:
        for i in range(5):
            ready = False
            random_theme = False
            if theme == '' or theme.isspace():
                theme = choose_random_theme(db, 1)
                random_theme = True

            # Try to solve the POS of the theme if the user hasn't provided it
            elif theme.find(tag_separator) == -1:
                theme = theme.lower()
                if language == 'en':
                    (token, pos_tag) = tag_en.quick_pos_tag(theme,
                                                            tagger=tagger)[0]
                elif language == 'fr':
                    (token, pos_tag) = tag_fr.quick_pos_tag\
                                       (theme, output='tuple', tagger=tagger_fr)[0]

                # Correspondences for the English LSA graph
                tag_dict = {'NN': 'n', 'VB': 'v', 'JJ': 'a', 'NNP': 'n'}
                if tag_separator == '\\' and tag_dict.has_key(pos_tag):
                    theme = token + tag_separator + tag_dict[pos_tag]
                else:
                    theme = token + tag_separator + pos_tag

            sys.stdout.write('Looking for theme words...')
            try:
                (words, goodness) = theme_based_words(theme, db, 20)
                print(' done.')
            except:
                if random_theme:
                    theme = ''
                    print('\nNot enough theme words found.')
                    continue
                else:
                    return error

            # Try to find a poem with enough replaceable words
            # orig_poem is a string of raw text
            orig_poem = get_chunk_of_corpus(corpus)
            orig_poem = helpers.capitalize_after_dot(orig_poem)
            for j in range(5):
                if language == 'en':
                    tagged = tag_en.pos_tag(orig_poem, tagger=tagger)
                elif language == 'fr':
                    tagged = tag_fr.pos_tag(orig_poem, tagger=tagger_fr)
                elif language == 'fi':
                    tagged = tag_fi.pos_tag(orig_poem)
                tag_counter = 0
                for t in tagged:
                    if t[1] in tag_list:
                        tag_counter = tag_counter + 1
                if float(tag_counter) / float(
                        len(tagged)) * 100 > min_replaced:
                    break
                else:
                    print('Not enough replaceable words.')

            sys.stdout.write('Replacing words...')
            try:
                replaced = replace_all(language, tagged, words, tag_list,
                                       tagger, tag_separator)
                ready = True
                print(' done.')
                break
            except Exception:
                if random_theme:
                    theme = ''
                    continue
                else:
                    return error
        if not ready:
            if random_theme:
                return error2
            else:
                return error

        sys.stdout.write('Analysing the new words...')
        if language == 'en':
            (analysed,
             replacing_words) = english_specific.add_morph(tagged, replaced)
        elif language == 'fr':
            (analysed,
             replacing_words) = french_specific.add_morph(tagged, replaced)
        elif language == 'fi':
            (analysed,
             replacing_words) = finnish_specific.add_morph(tagged, replaced)
        print(' done.')
        number_of_replaced = len(replacing_words)
        number_of_words = len(analysed)
        if number_of_words == 0:
            percent_replaced = 0
        else:
            percent_replaced=float(number_of_replaced)/float(number_of_words)\
                              *100
        if percent_replaced > min_replaced:
            message = str(round(percent_replaced, 0))\
                      +' percent of the words replaced.'
            break
        else:
            print(percent_replaced)
            print('Not enough words replaced.')
            if random_theme:
                theme = ''

    sys.stdout.write('Generating word forms...')
    if language == 'en':
        analysed = english_specific.correct_english(analysed)
        new_poem = morphg_en.generate_word_form(analysed)
    elif language == 'fr':
        analysed = french_specific.place_adjectives(analysed)
        analysed = french_specific.number_agreement(analysed)
        analysed = french_specific.gender_agreement(analysed)
        new_poem = morphg_fr.generate_word_form(analysed)
    elif language == 'fi':
        analysed = morphg_fi.generate_word_form(analysed)
    print(' done.')
    new_poem = new_poem.rstrip()

    theme = theme.split(tag_separator)[0]

    # Generate metadata.
    generate_metadata(replaced, goodness, 'runo_metadata.txt', newline)
    # Generate php file.
    lines = new_poem.split('\n')
    paragraph = '<p>'
    for line in lines:
        paragraph = paragraph + line + '<br />\n'
    paragraph = paragraph + '</p>'
    f = codecs.open('runo.php', 'w', 'utf-8')
    f.write('<h2>' + theme.capitalize() + '</h2>\n')
    f.write(paragraph)
    f.close()

    return (theme, orig_poem, new_poem, replacing_words, message)
Esempio n. 5
0
def generate_poem(language,
                  corpus,
                  db,
                  tag_list,
                  theme='',
                  tag_separator='_',
                  newline='&'):
    """
    Generates a poem.

    :param language: language id, current options en, fr
    :param corpus: poetry corpus
    :param db: graph database
    :param tag_list: list of POS-tags of the words that should be replaced
    :param theme: theme of the poem
    :param tag_separator: character that separates a word and a tag in the graph
    :param newline: the character that marks the place of the newline character
    :return: poem
    """

    # Minimum proportion of new words in the poem
    min_replaced = 0 % 25
    if language == 'fi':
        min_replaced = 0
    # Function to use to tag English.
    # Options:
    # Stanford tagger (recommended): pos_tag_stanford
    tagger = 'pos_tag_stanford'
    
    # Function to use to tag French.
    # Options:
    # MElt tagger (recommended): pos_tag_melt
    # Stanford tagger (doesn't tag proper nouns): pos_tag_stanford
    tagger_fr = 'pos_tag_melt'
    
    # Error messages to show to the user
    error_message = 'ERROR\n\
Not enough words found around the theme \'%s\'. Try with another theme.' % theme
    error_message2 = 'ERROR\n\
Sorry, something went wrong. Try again!'
    error = ('', '', error_message, [], '')
    error2 = ('', '', error_message2, [], '')

    theme = unicode(theme)

    while True:
        for i in range(5):
            ready = False
            random_theme = False
            if theme == '' or theme.isspace():
                theme = choose_random_theme(db, 1)
                random_theme = True

            # Try to solve the POS of the theme if the user hasn't provided it
            elif theme.find(tag_separator) == -1:
                theme = theme.lower()
                if language == 'en':
                    (token, pos_tag) = tag_en.quick_pos_tag(theme,
                                                         tagger=tagger)[0]
                elif language == 'fr':
                    (token, pos_tag) = tag_fr.quick_pos_tag\
                                       (theme, output='tuple', tagger=tagger_fr)[0]
                    
                # Correspondences for the English LSA graph
                tag_dict = {'NN':'n', 'VB':'v', 'JJ':'a', 'NNP':'n'}
                if tag_separator == '\\' and tag_dict.has_key(pos_tag):
                    theme = token + tag_separator + tag_dict[pos_tag]
                else:
                    theme = token + tag_separator + pos_tag

            sys.stdout.write('Looking for theme words...') 
            try:
                (words, goodness) = theme_based_words(theme, db, 20)
                print(' done.')
            except:
                if random_theme:
                    theme = ''
                    print('\nNot enough theme words found.')
                    continue
                else:
                    return error

            # Try to find a poem with enough replaceable words
            # orig_poem is a string of raw text
            orig_poem = get_chunk_of_corpus(corpus)
            orig_poem = helpers.capitalize_after_dot(orig_poem)
            for j in range(5):
                if language == 'en':
                    tagged = tag_en.pos_tag(orig_poem, tagger=tagger)
                elif language == 'fr':
                    tagged = tag_fr.pos_tag(orig_poem, tagger=tagger_fr)
                elif language == 'fi':
                    tagged = tag_fi.pos_tag(orig_poem)
                tag_counter = 0
                for t in tagged:
                    if t[1] in tag_list:
                        tag_counter = tag_counter + 1
                if float(tag_counter)/float(len(tagged))*100 > min_replaced:
                    break
                else:
                    print('Not enough replaceable words.')

            sys.stdout.write('Replacing words...')        
            try:
                replaced = replace_all(language, tagged, words,
                                       tag_list, tagger, tag_separator)
                ready = True
                print(' done.')
                break
            except Exception:
                if random_theme:
                    theme = ''
                    continue
                else:
                    return error
        if not ready:
            if random_theme:
                return error2
            else:
                return error

        sys.stdout.write('Analysing the new words...')        
        if language == 'en':  
            (analysed, replacing_words) = english_specific.add_morph(tagged,
                                                                     replaced)
        elif language == 'fr':
            (analysed, replacing_words) = french_specific.add_morph(tagged,
                                                                    replaced)
        elif language == 'fi':
            (analysed, replacing_words) = finnish_specific.add_morph(tagged,
                                                                     replaced)
        print(' done.')
        number_of_replaced = len(replacing_words)
        number_of_words = len(analysed)
        if number_of_words == 0:
            percent_replaced = 0
        else:
            percent_replaced=float(number_of_replaced)/float(number_of_words)\
                              *100
        if percent_replaced > min_replaced:
            message = str(round(percent_replaced, 0))\
                      +' percent of the words replaced.'
            break
        else:
            print(percent_replaced)
            print('Not enough words replaced.')
            if random_theme:
                theme = ''

    sys.stdout.write('Generating word forms...')
    if language == 'en':
        analysed = english_specific.correct_english(analysed)
        new_poem = morphg_en.generate_word_form(analysed)
    elif language == 'fr':
        analysed = french_specific.place_adjectives(analysed)
        analysed = french_specific.number_agreement(analysed)
        analysed = french_specific.gender_agreement(analysed)
        new_poem = morphg_fr.generate_word_form(analysed)
    elif language == 'fi':
        analysed = morphg_fi.generate_word_form(analysed)
    print(' done.')
    new_poem = new_poem.rstrip()

    theme = theme.split(tag_separator)[0]

    # Generate metadata.
    generate_metadata(replaced, goodness, 'runo_metadata.txt', newline)
    # Generate php file.
    lines = new_poem.split('\n')
    paragraph = '<p>'
    for line in lines:
        paragraph = paragraph + line + '<br />\n'
    paragraph = paragraph + '</p>'
    f = codecs.open('runo.php', 'w', 'utf-8')
    f.write('<h2>' + theme.capitalize() + '</h2>\n')
    f.write(paragraph)
    f.close()
    
    return (theme, orig_poem, new_poem, replacing_words, message)