def main(dir_name, file_name): if dir_name: genre = ntpath.basename(dir_name) start = datetime.datetime.now() if file_name: tokens = file_to_tokens(file_name) else: # One of these two will always be populated, based on how this function is called. tokens = dir_to_tokens(dir_name) tokens = filter_tokens(tokens) end = datetime.datetime.now() print "\nProcessed %d tokens in %s seconds." % (len(tokens), str((end-start).seconds)) start = datetime.datetime.now() unigram_model = build_unigram_model(tokens) unigram_model_name = "unigram" if dir_name: unigram_model_name = genre + "_" + unigram_model_name write_unigram_to_file(unigram_model, unigram_model_name) end = datetime.datetime.now() print "\nBuilt and wrote Unigram Model in %s seconds." % str((end-start).seconds) start = datetime.datetime.now() bigram_model = build_bigram_model(tokens) bigram_model_name = "bigram" if dir_name: bigram_model_name = genre + "_" + bigram_model_name write_bigram_to_file(bigram_model, bigram_model_name) end = datetime.datetime.now() print "\nBuilt and wrote Bigram Model in %s seconds." % str((end-start).seconds) start = datetime.datetime.now() trigram_model = build_trigram_model(tokens) trigram_model_name = "trigram" if dir_name: trigram_model_name = genre + "_" + trigram_model_name write_trigram_to_file(trigram_model, trigram_model_name) end = datetime.datetime.now() print "\nBuilt and wrote Trigram Model in %s seconds." % str((end-start).seconds)
def main(): print "\nCreating 'Children' Unigram Model with Good-Turing Smoothing..." start = datetime.datetime.now() from saved_models.children_unigram import model end = datetime.datetime.now() print "\tLoaded model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0) start = datetime.datetime.now() smoothed_model = createSmoothedModel(1, model) end = datetime.datetime.now() print "\tSmoothed model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0) start = datetime.datetime.now() write_unigram_to_file(smoothed_model, "children_unigram_smoothed") end = datetime.datetime.now() print "\tSaved model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0) print "\nCreating 'Children' Bigram Model with Good-Turing Smoothing..." start = datetime.datetime.now() from saved_models.children_bigram import model end = datetime.datetime.now() print "\tLoaded model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0) start = datetime.datetime.now() smoothed_model = createSmoothedModel(2, model) end = datetime.datetime.now() print "\tSmoothed model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0) start = datetime.datetime.now() write_unigram_to_file(smoothed_model, "children_bigram_smoothed") end = datetime.datetime.now() print "\tSaved model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0) print "\nCreating 'Children' Trigram Model with Good-Turing Smoothing..." start = datetime.datetime.now() from saved_models.children_trigram import model end = datetime.datetime.now() print "\tLoaded model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0) start = datetime.datetime.now() smoothed_model = createSmoothedModel(3, model) end = datetime.datetime.now() print "\tSmoothed model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0) start = datetime.datetime.now() write_unigram_to_file(smoothed_model, "children_trigram_smoothed") end = datetime.datetime.now() print "\tSaved model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0) print "\nCreating 'Crime' Unigram Model with Good-Turing Smoothing..." start = datetime.datetime.now() from saved_models.crime_unigram import model end = datetime.datetime.now() print "\tLoaded model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0) start = datetime.datetime.now() smoothed_model = createSmoothedModel(1, model) end = datetime.datetime.now() print "\tSmoothed model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0) start = datetime.datetime.now() write_unigram_to_file(smoothed_model, "crime_unigram_smoothed") end = datetime.datetime.now() print "\tSaved model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0) print "\nCreating 'Crime' Bigram Model with Good-Turing Smoothing..." start = datetime.datetime.now() from saved_models.crime_bigram import model end = datetime.datetime.now() print "\tLoaded model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0) start = datetime.datetime.now() smoothed_model = createSmoothedModel(2, model) end = datetime.datetime.now() print "\tSmoothed model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0) start = datetime.datetime.now() write_unigram_to_file(smoothed_model, "crime_bigram_smoothed") end = datetime.datetime.now() print "\tSaved model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0) print "\nCreating 'Crime' Trigram Model with Good-Turing Smoothing..." start = datetime.datetime.now() from saved_models.crime_trigram import model end = datetime.datetime.now() print "\tLoaded model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0) start = datetime.datetime.now() smoothed_model = createSmoothedModel(3, model) end = datetime.datetime.now() print "\tSmoothed model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0) start = datetime.datetime.now() write_unigram_to_file(smoothed_model, "crime_trigram_smoothed") end = datetime.datetime.now() print "\tSaved model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0) print "\nCreating 'History' Unigram Model with Good-Turing Smoothing..." start = datetime.datetime.now() from saved_models.history_unigram import model end = datetime.datetime.now() print "\tLoaded model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0) start = datetime.datetime.now() smoothed_model = createSmoothedModel(1, model) end = datetime.datetime.now() print "\tSmoothed model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0) start = datetime.datetime.now() write_unigram_to_file(smoothed_model, "history_unigram_smoothed") end = datetime.datetime.now() print "\tSaved model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0) print "\nCreating 'History' Bigram Model with Good-Turing Smoothing..." start = datetime.datetime.now() from saved_models.history_bigram import model end = datetime.datetime.now() print "\tLoaded model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0) start = datetime.datetime.now() smoothed_model = createSmoothedModel(2, model) end = datetime.datetime.now() print "\tSmoothed model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0) start = datetime.datetime.now() write_unigram_to_file(smoothed_model, "history_bigram_smoothed") end = datetime.datetime.now() print "\tSaved model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0) print "\nCreating 'History' Trigram Model with Good-Turing Smoothing..." start = datetime.datetime.now() from saved_models.history_trigram import model end = datetime.datetime.now() print "\tLoaded model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0) start = datetime.datetime.now() smoothed_model = createSmoothedModel(3, model) end = datetime.datetime.now() print "\tSmoothed model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0) start = datetime.datetime.now() write_unigram_to_file(smoothed_model, "history_trigram_smoothed") end = datetime.datetime.now() print "\tSaved model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)