def test_Add3(self): counterHashMap1 = CounterHashMap() for i in range(1000): counterHashMap1.put(i) counterHashMap2 = CounterHashMap() for i in range(500, 1000): counterHashMap2.putNTimes(1000 + i, i + 1) counterHashMap1.add(counterHashMap2) self.assertEquals(1500, len(counterHashMap1))
def test_Add2(self): counterHashMap1 = CounterHashMap() counterHashMap1.put("item1") counterHashMap1.put("item2") counterHashMap1.put("item1") counterHashMap1.put("item2") counterHashMap1.put("item1") counterHashMap2 = CounterHashMap() counterHashMap2.put("item4") counterHashMap2.putNTimes("item5", 4) counterHashMap2.put("item2") counterHashMap1.add(counterHashMap2) self.assertEquals(3, counterHashMap1.count("item1")) self.assertEquals(3, counterHashMap1.count("item2")) self.assertEquals(1, counterHashMap1.count("item4")) self.assertEquals(4, counterHashMap1.count("item5"))
def countWords(self, wordCounter: CounterHashMap, height: int): """ Counts words recursively given height and wordCounter. PARAMETERS ---------- wordCounter : CounterHashMap word counter keeping symbols and their counts. height : int height for NGram. if height = 1, If height = 1, N-Gram is treated as UniGram, if height = 2, N-Gram is treated as Bigram, etc. """ if height == 0: wordCounter.putNTimes(self.__symbol, self.__count) else: for child in self.__children.values(): child.countWords(wordCounter, height - 1)
def __init__(self, fileName=None): """ Constructor of RootWordStatistics class that generates a new map for statistics. """ self.__statistics = {} if fileName is not None: inputFile = open(fileName, encoding="utf8") size = int(inputFile.readline().strip()) for i in range(size): line = inputFile.readline().strip() items = line.split() rootWord = items[0] count = int(items[1]) wordMap = CounterHashMap() for j in range(count): line = inputFile.readline().strip() items = line.split() wordMap.putNTimes(items[0], int(items[1])) self.__statistics[rootWord] = wordMap inputFile.close()
def test_PutNTimes1(self): counterHashMap = CounterHashMap() counterHashMap.putNTimes("item1", 2) counterHashMap.putNTimes("item2", 3) counterHashMap.putNTimes("item3", 6) counterHashMap.putNTimes("item1", 2) counterHashMap.putNTimes("item2", 3) counterHashMap.putNTimes("item1", 2) self.assertEquals(6, counterHashMap.count("item1")) self.assertEquals(6, counterHashMap.count("item2")) self.assertEquals(6, counterHashMap.count("item3"))
def test_Add1(self): counterHashMap1 = CounterHashMap() counterHashMap1.put("item1") counterHashMap1.put("item2") counterHashMap1.put("item3") counterHashMap1.put("item1") counterHashMap1.put("item2") counterHashMap1.put("item1") counterHashMap2 = CounterHashMap() counterHashMap2.putNTimes("item1", 2) counterHashMap2.putNTimes("item2", 3) counterHashMap2.putNTimes("item3", 6) counterHashMap2.putNTimes("item1", 2) counterHashMap2.putNTimes("item2", 3) counterHashMap2.putNTimes("item1", 2) counterHashMap1.add(counterHashMap2) self.assertEquals(9, counterHashMap1.count("item1")) self.assertEquals(8, counterHashMap1.count("item2")) self.assertEquals(7, counterHashMap1.count("item3"))
def test_PutNTimes2(self): counterHashMap = CounterHashMap() for i in range(1000): counterHashMap.putNTimes(randrange(1000), i + 1) self.assertEquals(500500, counterHashMap.sumOfCounts())
def test_TopN2(self): counterHashMap = CounterHashMap() for i in range(1000): counterHashMap.putNTimes(i, 2 * i + 2) self.assertEquals(990, counterHashMap.topN(10)[9][1]) self.assertEquals(900, counterHashMap.topN(100)[99][1])