def readFile(self, url): try: f = open(url, 'r') except: print('File not found') newPath = input('Enter new path > '); return self.readFile(newPath) #TODO: this doesn't work for entirely unknown reasons newdate = re.compile('\s*([0-9]{1,2}-[0-9]{1,2}-[0-9]{2})\s*') currentDateStr = None currentDateObj = None numWords = 0 namesFound = set() totalWordNum = 0 currentDayEntry = '' #holds all the lines for the current day, so we can compute a hash of the day later on line = f.readline() while (line != ''): if self.prefs.GUESS_NAMES: self.guessNames(line) #check a line to see if it's a date, therefore a new day dateFound = newdate.match(line) if dateFound != None: #it's a new date, so wrapup the previous date and set up to move onto the next one if namesFound != None: self.addRelatedNames(namesFound) namesFound = set() self.dayEntryHashTable[currentDateObj] = hashlib.md5(currentDayEntry.encode()) #TODO: deal with first date if numWords > 0: self.wordCountOfEntriesDict[currentDateObj] = numWords #should be here, since we want it triggered at the end totalWordNum += numWords numWords = 0 currentDateStr = dateFound.group(0) currentDateStr = Helper.formatDateStringIntoCleanedString(currentDateStr) currentDateObj = Helper.makeDateObject(currentDateStr) if currentDateObj > self.mostRecentDate: #found a higher date than what we've seen so far self.mostRecentDate = currentDateObj if currentDateObj < self.firstDate: #found a lower date than what we have now self.firstDate = currentDateObj line = line[len(currentDateStr):] #remove date from line, so it's not a word if currentDateStr != None: (wordsFound, namesFoundThisLine) = self.addLine(line, currentDateObj) for name in namesFoundThisLine: namesFound.add(name) numWords += wordsFound line = f.readline() currentDayEntry += line #add line to the day's entry #need to capture the last date for the entry length self.wordCountOfEntriesDict[currentDateObj] = numWords self.totalNumberOfWords = totalWordNum + numWords #need to get words from last line f.close()