def run(train,nmax,reps,out): #Get probabilities with arbitrary precision fh = open(train) ngram.set_fractions(True) probs_ap=ngram.probabilities(ngram.good_turing(ngram.ngram(nmax,filters.unk(filters.shakespeare(fh))))) #Get probabilities with logs fh = open(train) ngram.set_fractions(False) probs_log=ngram.probabilities(ngram.good_turing(ngram.ngram(nmax,filters.unk(filters.shakespeare(fh))))) #Make sentences sentence_generation(train,out,nmax,reps,probs_ap,probs_log)
def main(): parser = ap.ArgumentParser(description='Play with some ngrams') parser.add_argument('-n', '--n-gram', metavar='N', type=int, dest='n', action='store', default=3, help='n-gram to compute') parser.add_argument('-t', '--train', metavar='FILE', type=file, dest='training', action='store', default='Shakespeare/Train.txt', help='Train ngrams from this file') parser.add_argument('-p', '--perplexity', metavar='FILE', type=file, dest='perplexity', action='store', default=None, help='measure'+ ' perplexity against this file') parser.add_argument('-l', '--logs', dest='use_logs', action='store_true', help='compute using logs, default is arbitrary-precision') parser.add_argument('-s', '--smooth', dest='smooth', action='store_true', help='smooth using Good-Turing smoothing') parser.add_argument('-m', '--make-sentence', dest='make_sentence', action='store_true', help='produce a sentence') args = parser.parse_args() if not args.make_sentence and args.perplexity is None: parser.print_help() exit() global use_fractions use_fractions = not args.use_logs print "main use_fractions: %s" % use_fractions words = filters.shakespeare(args.training) if args.perplexity is not None: unked_words = filters.unk(words) unked_ng = ngram(args.n, unked_words) if args.smooth: unked_ng = good_turing(unked_ng) unked_probs = probabilities(unked_ng) else: ng = ngram(args.n, words) if args.smooth: ng = good_turing(ng) probs = probabilities(ng) if args.perplexity is not None: print "this might take a while..." #perplex_data = filters.unk(filters.shakespeare(args.perplexity)) perplex_data = (filters.shakespeare(args.perplexity)) print "Perplexity: %s" % perplexity(unked_probs, perplex_data) if args.make_sentence: print " ".join(make_sentence(probs))