예제 #1
0
def run(train,nmax,reps,out):
	#Get probabilities with arbitrary precision
	fh = open(train)
	ngram.set_fractions(True)
	probs_ap=ngram.probabilities(ngram.good_turing(ngram.ngram(nmax,filters.unk(filters.shakespeare(fh)))))
	
	#Get probabilities with logs
	fh = open(train)
	ngram.set_fractions(False)
	probs_log=ngram.probabilities(ngram.good_turing(ngram.ngram(nmax,filters.unk(filters.shakespeare(fh)))))
	
	#Make sentences
	sentence_generation(train,out,nmax,reps,probs_ap,probs_log)
예제 #2
0
파일: ngram.py 프로젝트: astory/cs4740_1
def main():
	parser = ap.ArgumentParser(description='Play with some ngrams')
	parser.add_argument('-n', '--n-gram', metavar='N', type=int,
		dest='n', action='store', default=3,
		help='n-gram to compute')
	parser.add_argument('-t', '--train', metavar='FILE', type=file,
		dest='training', action='store', default='Shakespeare/Train.txt',
		help='Train ngrams from this file')
	parser.add_argument('-p', '--perplexity', metavar='FILE', type=file,
		dest='perplexity', action='store', default=None, help='measure'+
		' perplexity against this file')
	parser.add_argument('-l', '--logs', dest='use_logs', action='store_true',
		help='compute using logs, default is arbitrary-precision')
	parser.add_argument('-s', '--smooth', dest='smooth', action='store_true',
		help='smooth using Good-Turing smoothing')
	parser.add_argument('-m', '--make-sentence', dest='make_sentence',
	action='store_true', help='produce a sentence')
	args = parser.parse_args()
	if not args.make_sentence and args.perplexity is None:
		parser.print_help()
		exit()
	global use_fractions
	use_fractions = not args.use_logs

	print "main use_fractions: %s" % use_fractions

	words = filters.shakespeare(args.training)

	if args.perplexity is not None:
		unked_words = filters.unk(words)
		unked_ng = ngram(args.n, unked_words)
		if args.smooth:
			unked_ng = good_turing(unked_ng)
		unked_probs = probabilities(unked_ng)
	else:
		ng = ngram(args.n, words)
		if args.smooth:
			ng = good_turing(ng)
		probs = probabilities(ng)

	if args.perplexity is not None:
		print "this might take a while..."
		#perplex_data = filters.unk(filters.shakespeare(args.perplexity))
		perplex_data = (filters.shakespeare(args.perplexity))
		print "Perplexity: %s" % perplexity(unked_probs, perplex_data)

	if args.make_sentence:
		print " ".join(make_sentence(probs))