def test_add(self):
     hs = HashSet()
     assert hs.size == 0
     hs.add("Shane")
     assert hs.size == 1
     assert hs.contains("Shane") == True
     assert hs.contains("Alan") == False
     assert hs.contains("Bob") == False
     hs.add("Alan")
     assert hs.size == 2
     assert hs.contains("Shane") == True
     assert hs.contains("Alan") == True
     assert hs.contains("Bob") == False
def createConcordance(inputFile, stopWords, outputFile):

    #declare data structures
    stopSet = HashSet(3500)
    dictionary = HashDictionary(10000)
    bst = BST()

    #declare regular expressions
    newLine = re.compile(r'\n')
    exprSpaces = re.compile(r'\s')
    dhyp = re.compile('--')
    notChars = re.compile('\W|-')
    
    #populate hashset with stop words
    stopWords = open(stopWords, 'r')
    for stop in stopWords:
        x = newLine.search(stop)
        stop = stop[:x.start()]
        if stop == "":
            break
        stopSet.add(stop)
    stopWords.close()
    
    #open the input and process into words
    f = open(inputFile, 'r')
    lineNum = 0
    while True:
        line = f.readline()
        lineNum += 1
        if line == "":
            break

        #split lines
        m = dhyp.split(line)
        alist = []
        for i in range(len(m)):
            g = exprSpaces.split(m[i])
            alist = alist + g
            
        #strip down to words
        print alist
        for word in alist:
            if word == None:
                pass
            else:
                word = string.lower(word)
                while True:
                    n = notChars.search(word)
                    if len(word) <= 0:
                        break
                    elif n != None:
                        if n.start() == 0:
                            word = word[n.end():]
                        else:
                            word = word[:n.start()]
                    else:
                        break
                            
                #check if word is stop word
                if not stopSet.contains(word) and len(word) > 0:
                    #if word isn't already in dictionary
                    if dictionary.search(word) == None:
                        linkedValue = LinkedIndexedList()
                        dictionary.store(word, linkedValue)
                        dictionary.search(word).append(lineNum)
                        bst.add(word)
                    #if the word is in the dictionary
                    else:
                        dictionary.search(word).append(lineNum)
    f.close()

    #open output and use BST to print out words
    #   in alphabetical order
    output = open(outputFile, 'w')
    lyst = bst.inorder()
    temp = None
    for item in lyst:
        temp = dictionary.search(item)
        output.write(item + " - " + str(temp)+"\n")
    output.close()
    def test_remove(self):
        hs = HashSet(["Shane", "Alan", "Laurel"])
        assert hs.size == 3
        assert hs.contains("Shane") == True
        assert hs.contains("Alan") == True
        assert hs.contains("Laurel") == True

        hs.remove("Alan")
        assert hs.size == 2
        assert hs.contains("Alan") == False
        assert hs.contains("Shane") == True
        assert hs.contains("Laurel") == True

        hs.remove("Shane")
        assert hs.size == 1
        assert hs.contains("Alan") == False
        assert hs.contains("Shane") == False
        assert hs.contains("Laurel") == True

        hs.remove("Laurel")
        assert hs.size == 0
        assert hs.contains("Alan") == False
        assert hs.contains("Shane") == False
        assert hs.contains("Laurel") == False
 def test_init_edge_cases(self):
     hs = HashSet(["", 1, None])
     assert hs.size == 2
     assert hs.contains("") == True
 def test_filled_init(self):
     hs = HashSet(["Shane", "Alan"])
     assert hs.size == 2
     assert hs.contains("Alan") == True
     assert hs.contains("Bob") == False