def fixPunctuation(self): if not self.gettingSize: self.progress = "Fixing punctuation..." for paragraph in self.document2.paragraphs: for run in paragraph.runs: text = run.text #Hyphens text = regexlib.removeSub(text, " -") text = regexlib.replaceSub(text, "- ", "-") #Misc text = regexlib.replaceSub(text, "/", "I") text = regexlib.removeSub(text, "\\") text = regexlib.removeSub(text, "^") #Asterisks text = regexlib.replaceSub(text, "* * * *", "& & & &") text = regexlib.removeSub(text, "*") text = regexlib.replaceSub(text, "& & & &", "* * * *") # Periods and Spacing text = regexlib.replaceSub(text, "# .#", "# #") text = regexlib.replaceSub(text, " ", " ") text = regexlib.replaceSub(text, "“ ‘", "“‘") run.text = text self.step += 1
def fixPunctuation(self): if not self.gettingSize: self.progress = "Fixing punctuation..." for paragraph in self.document2.paragraphs: for run in paragraph.runs: text = run.text #Hyphens text = regexlib.removeSub(text," -") text = regexlib.replaceSub(text,"- ","-") #Misc text = regexlib.replaceSub(text,"/","I") text = regexlib.removeSub(text,"\\") text = regexlib.removeSub(text,"^") #Asterisks text = regexlib.replaceSub(text,"* * * *", "& & & &") text = regexlib.removeSub(text,"*") text = regexlib.replaceSub(text,"& & & &", "* * * *") # Periods and Spacing text = regexlib.replaceSub(text,"# .#","# #") text = regexlib.replaceSub(text," "," ") text = regexlib.replaceSub(text,"“ ‘","“‘") run.text = text self.step += 1
def removeSymbols(self): if not self.gettingSize: self.progress = "Removing symbols..." document2 = self.document2 self.step = 0.0 for paragraph in document2.paragraphs: for run in paragraph.runs: text = run.text text = text.replace(u"—", "---") text = regexlib.removeSub(text, "»") text = regexlib.removeSub(text, "|") text = regexlib.removeSub(text, "«") text = regexlib.removeSub(text, "•") text = regexlib.removeSub(text, " ") text = regexlib.removeSub(text, "_") text = regexlib.removeSub(text, "■") self.step += 1 run.text = text
def removeSymbols(self): if not self.gettingSize: self.progress = "Removing symbols..." document2 = self.document2 self.step = 0.0 for paragraph in document2.paragraphs: for run in paragraph.runs: text = run.text text = text.replace(u"—","---") text = regexlib.removeSub(text,"»") text = regexlib.removeSub(text,"|") text = regexlib.removeSub(text,"«") text = regexlib.removeSub(text,"•") text = regexlib.removeSub(text," ") text = regexlib.removeSub(text,"_") text = regexlib.removeSub(text,"■") self.step += 1 run.text = text