Ejemplo n.º 1
0
def get_chunk_of_corpus(corpus):
    """
    Returns a randomly selected piece of a poem from a text file.

    :param corpus: text file
    :return: some number of successive lines from the text file
    """
    test = ''
    lines = []
    f = codecs.open(corpus, 'r', 'utf-8')

    #Go to the randomly selected line
    random_place = random.randint(1, helpers.file_length(corpus) - 10)
    for i in range(random_place):
        f.readline()

    while len(lines) < 3:
        test = f.readline()
        #Go to the beginning of the next verse
        while not test.isspace():
            test = f.readline()
            if test == '':
                break
        while test.isspace():
            test = f.readline()
            if test == '':
                break

        while not test.isspace() and len(lines) < 6:
            lines.append(test.strip())
            test = f.readline()
            if test == '':
                break

        sent_end = ('.', '!', '?', ':', ';')
        if not test.isspace():
            if not lines[-1].endswith(sent_end):
                while not test.isspace() and\
                      not lines[-1].endswith(sent_end):
                    lines.append(test.strip())
                    test = f.readline()
                    if test == '':
                        break

    f.close

    # Insert a dot to the end
    punctuation = [',', ':', ';']
    last = lines[-1]
    if last[-1].isalpha():
        last = last + '.'
    elif last[-1] in punctuation:
        last = last[:len(last) - 1] + '.'
    lines[-1] = last

    string = '\n'.join(lines)

    return string
Ejemplo n.º 2
0
def get_chunk_of_corpus(corpus):
    """
    Returns a randomly selected piece of a poem from a text file.

    :param corpus: text file
    :return: some number of successive lines from the text file
    """
    test = ''
    lines = []
    f = codecs.open(corpus, 'r', 'utf-8')
    
    #Go to the randomly selected line
    random_place = random.randint(1, helpers.file_length(corpus)-10)
    for i in range(random_place):
        f.readline()

    while len(lines) < 3:
        test = f.readline()
        #Go to the beginning of the next verse
        while not test.isspace():
            test = f.readline()
            if test == '':
                break
        while test.isspace():
            test = f.readline()
            if test == '':
                break

        while not test.isspace() and len(lines) < 6:
            lines.append(test.strip())
            test = f.readline()
            if test == '':
                break

        sent_end = ('.', '!', '?', ':', ';')
        if not test.isspace():
            if not lines[-1].endswith(sent_end):
                while not test.isspace() and\
                      not lines[-1].endswith(sent_end):
                    lines.append(test.strip())
                    test = f.readline()
                    if test == '':
                        break
        
    f.close

    # Insert a dot to the end
    punctuation = [',', ':', ';']
    last = lines[-1]
    if last[-1].isalpha():
        last = last + '.'
    elif last[-1] in punctuation:
        last = last[:len(last)-1] + '.'
    lines[-1] = last
        
    string = '\n'.join(lines)
    
    return string
Ejemplo n.º 3
0
def get_verse_of_corpus(corpus, num_words, num_lines):
    """
    Reads some text from a file and formats it.

    The formatted text has the given number of lines and the given
    number of words per line.

    >>> verse = get_verse_of_corpus('../apparatus/poetry/english_poems.txt', 4, 4)
    >>> len(verse.split()) == 16
    True

    :param corpus: text file
    :param num_words: number of words per line
    :param num_lines: number of lines
    :return: random text with the given number of lines and given number
             of words per line
    """
    f = codecs.open(corpus, 'r', 'utf-8')

    #Go to the randomly selected line
    random_place = random.randint(1, helpers.file_length(corpus) - 10)
    for i in range(random_place):
        f.readline()

    test = ''
    lines = []
    words = []
    i = 0
    test = f.readline()
    words = test.split()
    while len(lines) < num_lines:
        line = []
        while len(line) < num_words:
            if i == len(words):
                i = 0
                test = f.readline()
                if test == '':
                    break
                words = test.split()
            for j in range(i, len(words)):
                if len(line) < num_words:
                    line.append(words[j])
                    i = i + 1
                else:
                    break
        lines.append(line)

    f.close

    string = ''
    for line in lines:
        string = string + ' '.join(line) + '\n'
    return string
Ejemplo n.º 4
0
def get_verse_of_corpus(corpus, num_words, num_lines):
    """
    Reads some text from a file and formats it.

    The formatted text has the given number of lines and the given
    number of words per line.

    >>> verse = get_verse_of_corpus('../apparatus/poetry/english_poems.txt', 4, 4)
    >>> len(verse.split()) == 16
    True

    :param corpus: text file
    :param num_words: number of words per line
    :param num_lines: number of lines
    :return: random text with the given number of lines and given number
             of words per line
    """
    f = codecs.open(corpus, 'r', 'utf-8')
    
    #Go to the randomly selected line
    random_place = random.randint(1, helpers.file_length(corpus)-10)
    for i in range(random_place):
        f.readline()

    test = ''
    lines = []
    words = []
    i = 0
    test = f.readline()
    words = test.split()
    while len(lines) < num_lines:
        line = []
        while len(line) < num_words:
            if i == len(words):
                i = 0
                test = f.readline()
                if test == '':
                    break
                words = test.split()
            for j in range(i, len(words)):
                if len(line) < num_words:
                    line.append(words[j])
                    i = i+1
                else:
                    break
        lines.append(line)
        
    f.close

    string = ''
    for line in lines:
        string = string + ' '.join(line) + '\n'
    return string