Python clean_file Examples, cleanup.clean_file Python Examples

Example #1

0

Show file

def markov():
    """Forming sentence with markov chain"""
    clean_text_list = clean_file('corpus.txt')
    markov_word = markov_chain(clean_text_list)
    # higher_order_markov_chain = nth_order_markov_model(2, clean_text_list)
    sentence = generate_sentence(10, markov_word)
    return sentence

Example #2

0

Show file

def tests():
    texts = ['rumpelstiltskin.txt', 'tom_thumb.txt']

    story_list = cleanup.clean_file(texts[0])

    result = tuples_histogram(story_list)

    # print(result)
    print(unique_words_lists_tuples_dictionary(result))
    print(frequency_tuples_and_lists("Killer", result))
    print(frequency_tuples_and_lists("to", result))
    # print(sort_tuples_and_lists(result))

    result = lists_histogram(story_list)

    # print(result)
    print(unique_words_lists_tuples_dictionary(result))
    print(frequency_tuples_and_lists("Killer", result))
    print(frequency_tuples_and_lists("to", result))
    # print(sort_tuples_and_lists(result))

    result = dictionary_histogram(story_list)

    # print(result)
    print(unique_words_lists_tuples_dictionary(result))
    print(frequency_dictionary("Killer", result))
    print(frequency_dictionary("to", result))

    result = counts_histogram(story_list)

    # print(result)
    print(unique_words_counts(result))

Example #3

0

Show file

def tests():
    import sys
    arguments = sys.argv[1:]  # Exclude script name in first argument
    if len(arguments) >= 1:
        # Test histogram on given arguments
        print_histogram(arguments)
    else:
        # # Test histogram on letters in a word
        word = 'abracadabra'
        print_histogram(list(word))
        # Test histogram on words in a classic book title
        fish_text = 'one fish two fish red fish blue fish'
        print_histogram(fish_text.split())
        # # Test histogram on words in a long repetitive sentence
        woodchuck_text = ('how much wood would a wood chuck chuck'
                          ' if a wood chuck could chuck wood')
        print_histogram(woodchuck_text.split())

    contents = cleanup.clean_file('ghosts_on_coruscant.txt')
    narkie = Narkovogram(4, contents)
    print(narkie.random_walk(10))

Example #4

0

Show file

def home():
    # Get cleaned data
    file_name = 'SiliconValley.txt'
    cleaned_file = cleanup.clean_file(file_name)

    # Create data structure
    data_structure = markov.make_higher_order_markov_model(3, cleaned_file)

    # Pass data structure to get random setence with 140 chars
    random_sentence = markov.generate_random_sentence_n(140, data_structure)

    # Get a random actor to say the quote
    random_actor = word_frequency.random_actor()
    random_actor_cleaned = random_actor.replace(' ', '_')
    random_actor_cleaned = random_actor_cleaned.replace('\'', '')
    print('random actor = ', random_actor)
    print('random actor cleaned ', random_actor_cleaned)
    print('random sentence ', random_sentence)

    # Render website <3
    return render_template('layout.html',
                           sentence=random_sentence,
                           actor=random_actor,
                           actor_image=random_actor_cleaned)

Example #5

0

Show file

                sentence_count += 1
                current_window = generate_random_start(markov_model)
                sentence = [current_window[0]]
            elif sentence_count == 0 and new_tweet_len >= length:
                # forget the sentence and generate a new one :P
                current_window = generate_random_start(markov_model)
                sentence = [current_window[0]]
            elif sentence_count > 0 and new_tweet_len < length:
                # More than one sentence. and length is still less max
                # Get another new sentence
                tweet += sentence_string
                sentence_string = ' '.join(sentence)
                sentence_count += 1
                current_window = generate_random_start(markov_model)
                sentence = [current_window[0]]
            else:
                # Return this good good tweet
                return tweet


if __name__ == '__main__':
    clean_text_list = clean_file('corpus.txt')
    # print(clean_text_list)
    # print(markov_chain(clean_text_list))
    # markov_chain = markov_chain(clean_text_list)
    # # higher_order_markov_chain = nth_order_markov_model(2, clean_text_list)
    # print(markov_chain)
    # sentence = generate_sentence(10, markov_chain)
    # # sentence = generate_sentence_with_higher_order(10, higher_order_markov_chain)
    # print(sentence)

Example #6

0

Show file

def main():
    texts = ['rumpelstiltskin.txt', 'tom_thumb.txt']

    contents = cleanup.clean_file(texts[0])

    print(random_walk(contents, 10))

Example #7

0

Show file

def tests():
    texts = ['rumpelstiltskin.txt', 'tom_thumb.txt']

    contents = cleanup.clean_file(texts[0])

    generate_sentence(10, contents)

Example #8

0

Show file

ten_thousand_words = list_of_words(10000)

hundred_hgram = Dictogram(hundred_words)
ten_thousand_hgram = Dictogram(ten_thousand_words)
hundred_search = hundred_words[-1]
ten_thousand_search = ten_thousand_words[-1]
stmt = "ten_thousand_hgram.count('{}')".format(ten_thousand_search)
setup = "from __main__ import ten_thousand_hgram"
timer = timeit.Timer(stmt, setup=setup)

iterations = 10000
result = timer.timeit(number=iterations)
print("count time for 100-word histogram: " + str(result))

# '*********** CREATING DATA STRUCTURES ***********'
words_list = cleanup.clean_file('SiliconValley_Actors.txt')


print 'Start creating Dictogram'
dictogram = Dictogram(words_list)
print 'Time to create Dictogram: ', 'seconds'
stmt_dictogram = "dictogram.return_weighted_random_word()"
setup_dictogram = "from __main__ import dictogram"
timer_dictogram = timeit.Timer(stmt_dictogram, setup=setup_dictogram)

result_dictogram = timer_dictogram.timeit(number=iterations)
print("count time for finding " + str(iterations) + " random weighted words " + str(result_dictogram))


listogram = Listogram(words_list)
print 'Time to create Listogram: ', 'seconds'

Example #9

0

Show file

from flask import Flask, request, render_template
import re
import random
import os
import sample
import cleanup
import narkovogram

import sentence

app = Flask(__name__)

contents = cleanup.clean_file('ghosts_on_coruscant.txt')
last_order = 5

# def get_words():
#     f = open("./text/ghosts_on_coruscant.txt", "r")
#     contents = f.read()
#     f.close()
#     return contents


@app.route('/', methods=['GET'])
def generate_sentence():
    narkie = narkovogram.Narkovogram(5, contents)
    number = request.args.get('num')
    order = request.args.get('order')
    if number is None:
        number = 10
    else:
        number = int(number)