def getMarkUnderWord(self, displayName, last20Words, wasPluralWithApostrophe): assert type(displayName) is str originalWord = displayName #needed when the name isn't actually a name displayName = Helper.cleanWord(displayName, True) print('\n\n\n') for x in last20Words: print(x + ' ', end='') print('\n' + displayName + ':') numPossibleLastNames = 0 if displayName in self.uniqueDisplayNamesToNameDict.keys(): #we've specified to give the same markup to all these display names firstName = self.uniqueDisplayNamesToNameDict[displayName][0] lastName = self.uniqueDisplayNamesToNameDict[displayName][1] else: #proceed normally firstName = '' print('Is this the proper first name for ' + displayName + '? [enter] for yes, [n] for no') isProperFirstName = input('>') if isProperFirstName == 'n': print('Enter proper first name (or enter "None" if this is not a name)') possibleFirstName = input('>') if possibleFirstName == 'None' or possibleFirstName == 'none': #not actually a name return WordClass.addWordOrMarkup(originalWord) firstName = possibleFirstName else: firstName = displayName try: self.lastNamesForFirstNameDict[firstName] #trigger exception if there's one to be thrown for nameFromDict in self.lastNamesForFirstNameDict[firstName]: print(str(numPossibleLastNames) + ': ' + nameFromDict) numPossibleLastNames = numPossibleLastNames + 1 print('Or type new last name (append "!" at end to auto assign all instance of this name to this last name):') except: print('Type last name (append "!" at end to auto assign all instance of this name to this last name):') #get the last name either from the number of the choice (if it's a number) or the last name that was directly entered lastName = '' choice = input('>') lastName = choice for x in range(0, numPossibleLastNames): if choice == str(x): lastName = self.lastNamesForFirstNameDict[firstName][x] break if lastName[-1] == '!': #specify that all instance of this display name are assigned to this last name, without asking again lastName = lastName[:-1] self.uniqueDisplayNamesToNameDict[displayName] = (firstName, lastName) try: if lastName not in self.lastNamesForFirstNameDict[firstName]: self.lastNamesForFirstNameDict[firstName].append(lastName) except: self.lastNamesForFirstNameDict[firstName] = [lastName] return WordClass.addNameWithMarkupPieces(displayName, firstName, lastName, wasPluralWithApostrophe)
def readFile(self, url): try: f = open(url, 'r') except: print('File not found') newPath = input('Enter new path > '); return self.readFile(newPath) #TODO: this doesn't work for entirely unknown reasons markupFile = open(self.markUpFilePath, 'a') markupFile.write('\n\n\n') markupFile.close() allWords = [] line = f.readline() last20Words = [] #maintains the last 20 words to give the user context for the name, which is a rolling list of 20 words ending in the particular name of note while line != '': markupFile = open(self.markUpFilePath, 'a') words = line.split(' ') last20Words = [] for word_str in words: if len(last20Words) >= 20: last20Words.pop(0) last20Words.append(word_str) (word_beforeStuff, word_str, word_afterStuff) = Helper.cleanWordForInitialAdd(word_str) if Helper.cleanWord(word_str, stripApostropheS=True) in self.namesSet: wasPluralWithApostrophe = False word_str = word_str.translate(str.maketrans({'‘':"'",'’':"'"})) #need to change from smart quotes to regular if word_str.endswith("'s"): word_str = word_str[:-2] wasPluralWithApostrophe = True word_class = self.getMarkUnderWord(word_str, last20Words, wasPluralWithApostrophe) else: word_class = WordClass.addWordOrMarkup(word_str) allWords.append(word_class) markupFile.write(word_beforeStuff + word_class.printMarkup() + word_afterStuff + ' ') #need to manually add a space since they're removed in the split #TODO: add spaces back only where they were taken from markupFile.close() line = f.readline()
def addLine(self, line, currentDate): # markunderFile = open(self.markUnderFilePath, 'a') words = line.split(' ') wordsToCount = 0 #used to calculate the length of entries - don't want to include invalid words in the word count TODO: rethink this? namesFound = set() for word in words: if word == '' or word == None or re.compile('^\s+$').search(word) != None: continue (beforeStuff, word, afterStuff) = Helper.cleanWordForInitialAdd(word) word = WordClass(word) #words are represented by the WordClass, which is basically an encapsulation of normal words and markup names in one object if self.prefs.COMBINE_PLURALS: if word.endswith("'s"): word = WordClass.addWordOrMarkup(word.toString()[:len(word)-2]) #TODO: this is broken wasUpper = False; if word.toString()[:1].isupper(): wasUpper = True; originalWord = word word = Helper.cleanWord(word) #this strips off all punctuation and other information that we want to pass into markup. if not Helper.valid(word): continue wordsToCount += 1 #names if word in self.namesSet and (Preferences.REQUIRE_CAPS_FOR_NAMES and wasUpper): namesFound.add(word) try: self.namesDict[word] = (self.namesDict[word][0] + 1, currentDate) except: self.namesDict[word] = (1, currentDate) #names per day try: if self.namesPerDayDict[word][1] != currentDate: self.namesPerDayDict[word] = (self.namesPerDayDict[word][0] + 1, currentDate) except: self.namesPerDayDict[word] = (1, currentDate) #names for graphing purposes try: #{ word : [ [ date , count ] ] } self.namesToGraphDict[word] #trigger exception if self.namesToGraphDict[word][-1][0] == currentDate: #increment count self.namesToGraphDict[word][-1][1] += 1 else: #start a new tuple with a new date self.namesToGraphDict[word].append([currentDate, 1]) except: #this name hasn't been encountered yet self.namesToGraphDict[word] = [[currentDate, 1]] #names for graph, counting on unique occurences try: #{ word : [ date ] } self.namesToGraphDictUniqueOccurences[word].append(currentDate) except: self.namesToGraphDictUniqueOccurences[word] = [currentDate] #words if self.wordDict.exists(word): self.wordDict.addOrReplaceWord(word, self.wordDict.getCount(word) + 1, currentDate, self.wordDict.getFirstOccurrence(word), wasUpper) else: self.wordDict.addWord(word, 1, currentDate, currentDate, wasUpper) #TODO: wasUpper wasn't there originally #words per day if self.wordsPerDayDict.exists(word): self.wordsPerDayDict.addWord(word, self.wordsPerDayDict.getCount(word), currentDate) #TODO: was addOrReplaceWord, need to think what it should be else: self.wordsPerDayDict(word, 1, currentDate) #TODO: this is being moved to its own class to be called separately # if self.prefs.DO_MARK_UNDER: # #if it's a name, qualify it for the markunder # if word in self.namesSet:# or not (Preferences.REQUIRE_CAPS_FOR_NAMES and wasUpper): # markUnderWord = self.getMarkUnderWord(word, originalWord, line, currentDate) # else: # markUnderWord = word # markunderFile.write(markUnderWord + ' ') # markunderFile.close() return (wordsToCount, namesFound)