Ejemplo n.º 1
0
def generate_model(author, steps):
    """Given an author name, processes the data/<author>.txt input for steps number
    of iterations into the model input to be used by the lambda_handler
    function.
    """
    predictor = Predictor(128)

    # Filenames.
    author_models_dir = get_dir_for_author(author)
    if not os.path.exists(author_models_dir):
        os.mkdir(author_models_dir)
    model_file = author_models_dir + author + ".model"
    vocab_file = author_models_dir + author + ".vocab"
    commons_file = author_models_dir + author + ".commons"
    raw_text_file = "../data/" + author + ".txt"

    # Read in the 'frequently used words' as common vocab.
    frequent = read_common_vocab("../data/20k_most_common.txt")

    # Clean the content.
    with open(raw_text_file, 'r') as raw:
        raw_words = raw.read().split(' ')
        data, _ = clean_input_data(raw_words, frequent)

    # Write out the words that occur in the clean data to the commons file.
    record_common_vocab(data, commons_file)

    # Train the model. This step takes the longest.
    predictor.train(data, steps)

    # Save the model that we have trained to disk.
    predictor.save(model_file, vocab_file)

    return predictor
Ejemplo n.º 2
0
def generate_model(author, steps):
    """Given an author name, processes the data/<author>.txt input for steps number
    of iterations into the model input to be used by the lambda_handler
    function.
    """
    predictor = Predictor(128)

    # Filenames.
    author_models_dir = get_dir_for_author(author)
    if not os.path.exists(author_models_dir):
        os.mkdir(author_models_dir)
    model_file = author_models_dir + author + ".model"
    vocab_file = author_models_dir +  author + ".vocab"
    commons_file = author_models_dir +  author + ".commons"
    raw_text_file = "../data/" + author + ".txt"

    # Read in the 'frequently used words' as common vocab.
    frequent = read_common_vocab("../data/20k_most_common.txt")

    # Clean the content.
    with open(raw_text_file, 'r') as raw:
        raw_words = raw.read().split(' ')
        data, _ = clean_input_data(raw_words, frequent)

    # Write out the words that occur in the clean data to the commons file.
    record_common_vocab(data, commons_file)

    # Train the model. This step takes the longest.
    predictor.train(data, steps)

    # Save the model that we have trained to disk.
    predictor.save(model_file, vocab_file)

    return predictor
Ejemplo n.º 3
0
def lambda_handler(event, context):
    """Use a model for an existing author to generate length words, interleaved
       with user text input."""
    author = event["author"]
    user_text = event["userText"]
    length = event["length"]

    # Load in the predictor
    model_file = get_dir_for_author(author) + author + ".model"
    vocab_file = get_dir_for_author(author) + author + ".vocab"
    predictor = Predictor(128, model=model_file, vocab=vocab_file)

    # Clean the user data and separate out unknown words.
    common_vocab = read_common_cocab(get_dir_for_author(author) + author + ".commons")
    data, unique_user_words = clean_input_data(user_text, common_vocab)

    generated_sample = predictor.sample(length)
    return clean_generated_data(' '.join(generated_sample), unique_user_words)