def readFile(self, url): try: f = open(url, 'r') except: print('File not found') newPath = input('Enter new path > '); return self.readFile(newPath) #TODO: this doesn't work for entirely unknown reasons newdate = re.compile('\s*([0-9]{1,2}-[0-9]{1,2}-[0-9]{2})\s*') currentDateStr = None currentDateObj = None numWords = 0 namesFound = set() totalWordNum = 0 currentDayEntry = '' #holds all the lines for the current day, so we can compute a hash of the day later on line = f.readline() while (line != ''): if self.prefs.GUESS_NAMES: self.guessNames(line) #check a line to see if it's a date, therefore a new day dateFound = newdate.match(line) if dateFound != None: #it's a new date, so wrapup the previous date and set up to move onto the next one if namesFound != None: self.addRelatedNames(namesFound) namesFound = set() self.dayEntryHashTable[currentDateObj] = hashlib.md5(currentDayEntry.encode()) #TODO: deal with first date if numWords > 0: self.wordCountOfEntriesDict[currentDateObj] = numWords #should be here, since we want it triggered at the end totalWordNum += numWords numWords = 0 currentDateStr = dateFound.group(0) currentDateStr = Helper.formatDateStringIntoCleanedString(currentDateStr) currentDateObj = Helper.makeDateObject(currentDateStr) if currentDateObj > self.mostRecentDate: #found a higher date than what we've seen so far self.mostRecentDate = currentDateObj if currentDateObj < self.firstDate: #found a lower date than what we have now self.firstDate = currentDateObj line = line[len(currentDateStr):] #remove date from line, so it's not a word if currentDateStr != None: (wordsFound, namesFoundThisLine) = self.addLine(line, currentDateObj) for name in namesFoundThisLine: namesFound.add(name) numWords += wordsFound line = f.readline() currentDayEntry += line #add line to the day's entry #need to capture the last date for the entry length self.wordCountOfEntriesDict[currentDateObj] = numWords self.totalNumberOfWords = totalWordNum + numWords #need to get words from last line f.close()
from WordDict import WordDict from Helper import Helper import unittest from WordFrequenciesClass import WordFrequencies from datetime import datetime import argparse from io import StringIO import sys date = Helper.makeDateObject('10-12-17') class TestUM(unittest.TestCase): wd = WordDict() @classmethod def setUpClass(self): self.wd.addWord('word1', 1, date, date, False) self.wd.addWord('word2', 1, date, date, False) self.wd.addWord('word3', 1, date, date, False) @classmethod def tearDownClass(self): pass def test_addWordNoConflicts(self): self.assertEqual(self.wd.getCount('word1'), 1) self.assertEqual(self.wd.getFirstOccurrence('word1'), date) def test_incrementCount(self): self.wd.incrementCount('word1') self.assertEqual(self.wd.getCount('word1'), 2)