def generate_model(author, steps): """Given an author name, processes the data/<author>.txt input for steps number of iterations into the model input to be used by the lambda_handler function. """ predictor = Predictor(128) # Filenames. author_models_dir = get_dir_for_author(author) if not os.path.exists(author_models_dir): os.mkdir(author_models_dir) model_file = author_models_dir + author + ".model" vocab_file = author_models_dir + author + ".vocab" commons_file = author_models_dir + author + ".commons" raw_text_file = "../data/" + author + ".txt" # Read in the 'frequently used words' as common vocab. frequent = read_common_vocab("../data/20k_most_common.txt") # Clean the content. with open(raw_text_file, 'r') as raw: raw_words = raw.read().split(' ') data, _ = clean_input_data(raw_words, frequent) # Write out the words that occur in the clean data to the commons file. record_common_vocab(data, commons_file) # Train the model. This step takes the longest. predictor.train(data, steps) # Save the model that we have trained to disk. predictor.save(model_file, vocab_file) return predictor
def lambda_handler(event, context): """Use a model for an existing author to generate length words, interleaved with user text input.""" author = event["author"] user_text = event["userText"] length = event["length"] # Load in the predictor model_file = get_dir_for_author(author) + author + ".model" vocab_file = get_dir_for_author(author) + author + ".vocab" predictor = Predictor(128, model=model_file, vocab=vocab_file) # Clean the user data and separate out unknown words. common_vocab = read_common_cocab(get_dir_for_author(author) + author + ".commons") data, unique_user_words = clean_input_data(user_text, common_vocab) generated_sample = predictor.sample(length) return clean_generated_data(' '.join(generated_sample), unique_user_words)