def testReadWordsFromFile():
    testwordlist = ['moon', 'man', 'over', 'the', 'help', 'me',
                 't', 'fi']
    testhashtaglist = ['manoverthemoon', 'helpmeoverthere', 'findmefood', 'whaaaaat']
    createWordAndHashtagFiles()
    hashtaglist = werthman_assgn1.readWordsFromFile('testhashtaglist.txt', False, 20)
    wordlist =  werthman_assgn1.readWordsFromFile('testwordlist.txt', True, 20)
    
    #tests that the right words and hashtags are in the lists
    for word in wordlist:  
        assertion(word in testwordlist, "Set contains {0}.".format(word))
        
    for hashtag in hashtaglist:  
        assertion(hashtag in testhashtaglist, "Set contains {0}.".format(hashtag))
    
    #test that the word list is limited by a certain size
    wordlist = werthman_assgn1.readWordsFromFile('testwordlist.txt', True, 2)
    assertion(len(wordlist) == 2, "Word list should be size 2.")
def testMinEditDistanceAlgo():
    #retrieve the wordlist and hashtag list form the file system
    wordlist = werthman_assgn1.readWordsFromFile("testwordlist.txt", True, 75000)
    hashtags = werthman_assgn1.readWordsFromFile("testhashtaglist.txt", False, 0)
    
    #use the maxmatch algo and change the hashtags and add them to a list
    maxmatchHashtags = []
    for hashtag in hashtags:
        maxmatchHashtags.append(werthman_assgn1.maxMatch(hashtag, wordlist, ""))
    
    #read in the list of what the hashtags should really look like
    correctHashtags = []
    with open("realtesthashtags.txt", "r") as f:
        for line in f:
            correctHashtags.append(line.strip())
    
    #compare each maxmatchHashtag to each correctHashtag word by word
    totalWER = 0.0
    for maxmatchHashtag, correctHashtag in zip(maxmatchHashtags, correctHashtags):
        #convert each string to a list of the words in the string
        maxmatchHashtagAsList = maxmatchHashtag.split()
        correctHashtagAsList = correctHashtag.split()
        
        #test WER for the the hashtags created by maxmatch
        if correctHashtag == "man over the moon" :
            #maxmatchHashtag is the same as the correctHashtag so no changes need to be made
            assertion(werthman_assgn1.minEditDist(correctHashtagAsList, maxmatchHashtagAsList) == 0, "Man over the moon should have a min edit distance of 0")
            assertion(werthman_assgn1.minEditDist(correctHashtagAsList, maxmatchHashtagAsList)/len(correctHashtagAsList) == 0, "Man over the moon should have a WER of 0")
            totalWER += werthman_assgn1.minEditDist(correctHashtagAsList, maxmatchHashtagAsList)/len(correctHashtagAsList)
        elif correctHashtag == "find me food":
            #maxmatchHashtag:fi ndmefood requires two substitutions fi -> find and ndmefood -> me and an insertion of food
            assertion(werthman_assgn1.minEditDist(correctHashtagAsList, maxmatchHashtagAsList) == 7, "Find me food should have a min edit distance of 7")
            assertion(werthman_assgn1.minEditDist(correctHashtagAsList, maxmatchHashtagAsList)/len(correctHashtagAsList) == 7.0/3, "Find me food should have a should have a WER of 7/3")
            totalWER += werthman_assgn1.minEditDist(correctHashtagAsList, maxmatchHashtagAsList)/len(correctHashtagAsList)
        elif correctHashtag == "help me over there":
            #masmatchHashtag:help me over the re requires a substitution the->there and a deletion of re
            assertion(werthman_assgn1.minEditDist(correctHashtagAsList, maxmatchHashtagAsList) == 3, "help me over there should have a min edit distance of 3")
            assertion(werthman_assgn1.minEditDist(correctHashtagAsList, maxmatchHashtagAsList)/len(correctHashtagAsList) == 3.0/4, "help me over there should have a WER of 3/4")
            totalWER += werthman_assgn1.minEditDist(correctHashtagAsList, maxmatchHashtagAsList)/len(correctHashtagAsList)

    #average the WER across of the hashtags
    assertion(totalWER/len(correctHashtags) == ((7.0/3 + 3.0/4)/3.0), "Average WER across test set should be .58")
def finalTestMaxMatchAlgo():
    #retrieve the wordlist and hashtag list form the file system
    wordlist = werthman_assgn1.readWordsFromFile('bigwordlist.txt', True, 75000)
    hashtags = werthman_assgn1.readWordsFromFile('hashtags-train.txt', False, 0)
    
    #use the maxmatch algo and change the hashtags and add them to a list
    maxmatchHashtags = []
    for hashtag in hashtags:
        maxmatchHashtags.append(werthman_assgn1.maxMatch(hashtag, wordlist, ""))
        
    #get the hashtags of the expected output of the maxmatch algo from professor's provided file
    expectedHashtags = []
    with open('hashtags-train-maxmatch.txt', 'r') as f:
        for line in f:
            #strip off whitespace characters like newlines
            expectedHashtags.append(line.strip())
    
    #compare the hashtags created by the maxmatch algo to those provided by the professor
    for maxmatchHashtag, expectedmaxmatchHashtag in zip(maxmatchHashtags, expectedHashtags):
        assertion(maxmatchHashtag == expectedmaxmatchHashtag, "My maxmatch algo hashtag: {0} should be the same as the professor's maxmatch algo: {1}.".format(maxmatchHashtag, expectedmaxmatchHashtag))