Ejemplo n.º 1
0
def main(dir_name, file_name):
	if dir_name:
		genre = ntpath.basename(dir_name)
	start = datetime.datetime.now()
	if file_name:
		tokens = file_to_tokens(file_name)
	else:  # One of these two will always be populated, based on how this function is called.
		tokens = dir_to_tokens(dir_name)
	tokens = filter_tokens(tokens)
	end = datetime.datetime.now()
	print "\nProcessed %d tokens in %s seconds." % (len(tokens), str((end-start).seconds))

	start = datetime.datetime.now()
	unigram_model = build_unigram_model(tokens)
	unigram_model_name = "unigram"
	if dir_name:
		unigram_model_name = genre + "_" + unigram_model_name
	write_unigram_to_file(unigram_model, unigram_model_name)	
	end = datetime.datetime.now()
	print "\nBuilt and wrote Unigram Model in %s seconds." % str((end-start).seconds)

	start = datetime.datetime.now()
	bigram_model = build_bigram_model(tokens)
	bigram_model_name = "bigram"
	if dir_name:
		bigram_model_name = genre + "_" + bigram_model_name
	write_bigram_to_file(bigram_model, bigram_model_name)	
	end = datetime.datetime.now()
	print "\nBuilt and wrote Bigram Model in %s seconds." % str((end-start).seconds)

	start = datetime.datetime.now()
	trigram_model = build_trigram_model(tokens)
	trigram_model_name = "trigram"
	if dir_name:
		trigram_model_name = genre + "_" + trigram_model_name
	write_trigram_to_file(trigram_model, trigram_model_name)	
	end = datetime.datetime.now()
	print "\nBuilt and wrote Trigram Model in %s seconds." % str((end-start).seconds)
Ejemplo n.º 2
0
def main():
    print "\nCreating 'Children' Unigram Model with Good-Turing Smoothing..."
    start = datetime.datetime.now()
    from saved_models.children_unigram import model
    end = datetime.datetime.now()
    print "\tLoaded model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)

    start = datetime.datetime.now()
    smoothed_model = createSmoothedModel(1, model)
    end = datetime.datetime.now()
    print "\tSmoothed model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)

    start = datetime.datetime.now()
    write_unigram_to_file(smoothed_model, "children_unigram_smoothed")
    end = datetime.datetime.now()
    print "\tSaved model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)


    print "\nCreating 'Children' Bigram Model with Good-Turing Smoothing..."
    start = datetime.datetime.now()
    from saved_models.children_bigram import model
    end = datetime.datetime.now()
    print "\tLoaded model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)

    start = datetime.datetime.now()
    smoothed_model = createSmoothedModel(2, model)
    end = datetime.datetime.now()
    print "\tSmoothed model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)

    start = datetime.datetime.now()
    write_unigram_to_file(smoothed_model, "children_bigram_smoothed")
    end = datetime.datetime.now()
    print "\tSaved model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)


    print "\nCreating 'Children' Trigram Model with Good-Turing Smoothing..."
    start = datetime.datetime.now()
    from saved_models.children_trigram import model    
    end = datetime.datetime.now()
    print "\tLoaded model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)

    start = datetime.datetime.now()
    smoothed_model = createSmoothedModel(3, model)
    end = datetime.datetime.now()
    print "\tSmoothed model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)

    start = datetime.datetime.now()
    write_unigram_to_file(smoothed_model, "children_trigram_smoothed")
    end = datetime.datetime.now()
    print "\tSaved model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)


    print "\nCreating 'Crime' Unigram Model with Good-Turing Smoothing..."
    start = datetime.datetime.now()
    from saved_models.crime_unigram import model    
    end = datetime.datetime.now()
    print "\tLoaded model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)

    start = datetime.datetime.now()
    smoothed_model = createSmoothedModel(1, model)
    end = datetime.datetime.now()
    print "\tSmoothed model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)

    start = datetime.datetime.now()
    write_unigram_to_file(smoothed_model, "crime_unigram_smoothed")
    end = datetime.datetime.now()
    print "\tSaved model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)


    print "\nCreating 'Crime' Bigram Model with Good-Turing Smoothing..."
    start = datetime.datetime.now()
    from saved_models.crime_bigram import model    
    end = datetime.datetime.now()
    print "\tLoaded model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)

    start = datetime.datetime.now()
    smoothed_model = createSmoothedModel(2, model)
    end = datetime.datetime.now()
    print "\tSmoothed model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)

    start = datetime.datetime.now()
    write_unigram_to_file(smoothed_model, "crime_bigram_smoothed")
    end = datetime.datetime.now()
    print "\tSaved model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)


    print "\nCreating 'Crime' Trigram Model with Good-Turing Smoothing..."
    start = datetime.datetime.now()
    from saved_models.crime_trigram import model    
    end = datetime.datetime.now()
    print "\tLoaded model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)

    start = datetime.datetime.now()
    smoothed_model = createSmoothedModel(3, model)
    end = datetime.datetime.now()
    print "\tSmoothed model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)

    start = datetime.datetime.now()
    write_unigram_to_file(smoothed_model, "crime_trigram_smoothed")
    end = datetime.datetime.now()
    print "\tSaved model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)


    print "\nCreating 'History' Unigram Model with Good-Turing Smoothing..."
    start = datetime.datetime.now()
    from saved_models.history_unigram import model    
    end = datetime.datetime.now()
    print "\tLoaded model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)

    start = datetime.datetime.now()
    smoothed_model = createSmoothedModel(1, model)
    end = datetime.datetime.now()
    print "\tSmoothed model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)

    start = datetime.datetime.now()
    write_unigram_to_file(smoothed_model, "history_unigram_smoothed")
    end = datetime.datetime.now()
    print "\tSaved model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)


    print "\nCreating 'History' Bigram Model with Good-Turing Smoothing..."
    start = datetime.datetime.now()
    from saved_models.history_bigram import model    
    end = datetime.datetime.now()
    print "\tLoaded model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)

    start = datetime.datetime.now()
    smoothed_model = createSmoothedModel(2, model)
    end = datetime.datetime.now()
    print "\tSmoothed model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)

    start = datetime.datetime.now()
    write_unigram_to_file(smoothed_model, "history_bigram_smoothed")
    end = datetime.datetime.now()
    print "\tSaved model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)


    print "\nCreating 'History' Trigram Model with Good-Turing Smoothing..."
    start = datetime.datetime.now()
    from saved_models.history_trigram import model    
    end = datetime.datetime.now()
    print "\tLoaded model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)

    start = datetime.datetime.now()
    smoothed_model = createSmoothedModel(3, model)
    end = datetime.datetime.now()
    print "\tSmoothed model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)

    start = datetime.datetime.now()
    write_unigram_to_file(smoothed_model, "history_trigram_smoothed")
    end = datetime.datetime.now()
    print "\tSaved model in %s seconds." % str(float((end-start).seconds) + (end-start).microseconds / 1000000.0)