Exemple #1
0
def main(option):
    if option == 1 or option == 3:
        model = unigram.Unigram(read_data())
        print('-------------- Baseline -----------------')
        print(
            f'Accuracy of Unigram on data/dev = {predict(model, filename_dev)*100:.2f} %\n'
        )
    if option == 2 or option == 3:
        five_gram_model = five_gram.Five_gram(read_data())
        print('-------- n-gram Language Model ----------')
        print(
            f'Accuracy of 5-Gram on data/dev = {predict(five_gram_model, filename_dev)*100:.2f} %'
        )
        print(
            f'Accuracy of 5-Gram on data/test = {predict(five_gram_model, filename_test)*100:.2f} %'
        )
Exemple #2
0
import argparse
import unigram
import fivegram
import sevengram_smoothed
import operator

unigram_model = unigram.Unigram()
fivegram_model = fivegram.Fivegram()
sevengram_smoothed_model = sevengram_smoothed.Sevengram_Smoothed()

def predict(filename):
    """Predict the text file on the model."""
    total = 0.
    correct = 0
    for line in open(filename):
        model.start()
        for w in line.rstrip('\n'):
            if model.predict() == w:
                correct += 1
            model.read(w)
            total += 1
    print("Accuracy = " + str(correct/total*100) + "%")

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--model')
    parser.add_argument('--train', required=True)
    parser.add_argument('--predict', required=True)
    args = parser.parse_args()

    if args.model == 'unigram':
Exemple #3
0
def main():
    model = unigram.Unigram(read_data())
    print('-------------- Baseline -----------------')
    print(f'Accuracy of Unigram on data/dev = {predict(model)*100:.2f} %')
Exemple #4
0
                print('\a', end='')
            else:
                erase()
                print(f'\x1b[1D', end='', flush=True)
                chars.pop()
                states.pop()
        else:
            print(c, end='', flush=True)
            chars.append(c)
            states.append(lm.read(states[-1], c))
    return ''.join(chars)

if __name__ == "__main__":
    import argparse
    import unigram

    parser = argparse.ArgumentParser()
    parser.add_argument(dest='train')
    args = parser.parse_args()

    data = [list(line.rstrip('\n')) for line in open(args.train)]

    ##### Replace this line with an instantiation of your model #####
    lm = unigram.Unigram(data)

    while True:
        try:
            line = getline('> ')
        except EOFError:
            break
Exemple #5
0
parser.add_argument(dest='train')
args = parser.parse_args()

train = []
dev= []

#Import Train
with open(args.train, "r") as f: 
    train = [list(line.rstrip()) for line in f]

#Import Dev
with open("data/dev", "r") as f: 
    dev = [list(line.rstrip("\n")) + ['<EOS>'] for line in f]

#Train Model
m = unigram.Unigram(train)
q = m.start()

total = 0
correct = 0


#Read Dev Data into the model.
for line in dev:
    for index, a in enumerate(line):  
        predicted_symbol = m.best(q)
        if index != len(line) - 1:
            gold_symbol = line[index + 1]
            if predicted_symbol == gold_symbol:
                correct += 1
            total+=1