def main(option): if option == 1 or option == 3: model = unigram.Unigram(read_data()) print('-------------- Baseline -----------------') print( f'Accuracy of Unigram on data/dev = {predict(model, filename_dev)*100:.2f} %\n' ) if option == 2 or option == 3: five_gram_model = five_gram.Five_gram(read_data()) print('-------- n-gram Language Model ----------') print( f'Accuracy of 5-Gram on data/dev = {predict(five_gram_model, filename_dev)*100:.2f} %' ) print( f'Accuracy of 5-Gram on data/test = {predict(five_gram_model, filename_test)*100:.2f} %' )
import argparse import unigram import fivegram import sevengram_smoothed import operator unigram_model = unigram.Unigram() fivegram_model = fivegram.Fivegram() sevengram_smoothed_model = sevengram_smoothed.Sevengram_Smoothed() def predict(filename): """Predict the text file on the model.""" total = 0. correct = 0 for line in open(filename): model.start() for w in line.rstrip('\n'): if model.predict() == w: correct += 1 model.read(w) total += 1 print("Accuracy = " + str(correct/total*100) + "%") if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--model') parser.add_argument('--train', required=True) parser.add_argument('--predict', required=True) args = parser.parse_args() if args.model == 'unigram':
def main(): model = unigram.Unigram(read_data()) print('-------------- Baseline -----------------') print(f'Accuracy of Unigram on data/dev = {predict(model)*100:.2f} %')
print('\a', end='') else: erase() print(f'\x1b[1D', end='', flush=True) chars.pop() states.pop() else: print(c, end='', flush=True) chars.append(c) states.append(lm.read(states[-1], c)) return ''.join(chars) if __name__ == "__main__": import argparse import unigram parser = argparse.ArgumentParser() parser.add_argument(dest='train') args = parser.parse_args() data = [list(line.rstrip('\n')) for line in open(args.train)] ##### Replace this line with an instantiation of your model ##### lm = unigram.Unigram(data) while True: try: line = getline('> ') except EOFError: break
parser.add_argument(dest='train') args = parser.parse_args() train = [] dev= [] #Import Train with open(args.train, "r") as f: train = [list(line.rstrip()) for line in f] #Import Dev with open("data/dev", "r") as f: dev = [list(line.rstrip("\n")) + ['<EOS>'] for line in f] #Train Model m = unigram.Unigram(train) q = m.start() total = 0 correct = 0 #Read Dev Data into the model. for line in dev: for index, a in enumerate(line): predicted_symbol = m.best(q) if index != len(line) - 1: gold_symbol = line[index + 1] if predicted_symbol == gold_symbol: correct += 1 total+=1