def checkPairs(text): matches = findRegEx(r"\w+\s+(\([^)]+(:?\.\s+[A-Z]))", text) # non closing matches += findRegEx( r"(\w+\s*\([^)]+\(\s*\w+|\w+\s*\)\s*\(\s*\w+)", text ) # nested or subsequent parentheses # matches += findRegEx( r'(\w\w+\(\w)' , text ) # no space before # @todo: check that the line is not code : no +-/= in the same line for match in matches: print(match[0], "Possibly problem with parentheses: ", match[2].group(0), "") matches = findRegEx(r"\s+(“[^”]+“|”\s*“)", text) # nested or subsequent “” # nested or subsequent “” matches += findRegEx(r"\s+(‘[^’]+‘|’\s*‘)", text) for match in matches: print(match[0], "Possibly problem with quotes: ", match[2].group(0), "")
def checkPlural(text): matches = findRegEx( r"\s+(?:[Aa]n?|[Aa]nother|[Ee]ach|[Ee]very)\s+(\w+[^uis'’]s)\s+", text ) for match in matches: replace = ( match[2].group(0).replace(match[2].group(1), match[2].group(1)[:-1], 1) )
def checkSplitInfinitve(text): matches = findRegEx(r"\sto (" + reAdv + r") (\w{4,})", text) for match in matches: if match[2].group(2) not in lstDeterminer + lstAdpos + lstConjunction + lstAdv: replace = "to " + match[2].group(2) + " " + match[2].group(1) print( match[0], "An adverb probably splits an infinitive expression.", match[2].group(0), replace, )
def checkTeXmath(text): result = "" reFunction = r"(sin|cos|tan|log|min|max|exp)" matches = findRegEx( r"\\begin\{equation\}\s*\n\s*([^$]+?)\s*\n\s*\\end\{equation\}", text) # matched = findRegEx(r'\s\$(\S[^$]+\S)\$\s', text) for match in matches: # print(match[2].group(1)) fun = re.search(r"(?:\s|[\(\)\*])" + reFunction, match[2].group(1)) if fun != None: result += ' * WARN: use "\\{}" instead of "{}". (Equation ln. {})'.format( fun.group(1), fun.group(1), match[0]) return result
def checkAbbreviations(text): # todo: check if Acronym was only used once => suspicious! dictionary = {} corrections = [] spelling.read_acronyms(dictionary, DIC_DIR.joinpath("acronyms.md")) foundAbbreviations = lstAcronyms + list(dictionary.keys()) matches = findRegEx(r"\s([A-Z][A-Z])\s", text) for match in matches: if match[2].group(1) not in foundAbbreviations: print( match[0], "Found two character acronym, could be written in full words.", match[2].group(0), "", ) foundAbbreviations.append(match[2].group(1)) matches = findRegEx(r"\s([A-Z]{3,5})\s(?!\(|“)", text) desc = "Acronym '{}' was probably never introduced" for match in matches: if ( "(" + match[2].group(1) + ")" not in text and match[2].group(1) not in foundAbbreviations ): print("Line {}: {}.".format(match[0], desc.format(match[2].group(0)))) foundAbbreviations.append(match[2].group(1)) corrections.append( Correction( match[0], match[1], match[2].group(0), "?", desc.format(match[2].group(0)), ) ) return corrections
def checkTeXheadings(text): lstTeXHeading = [ "title", "chapter", "section", "subsection", "subsubsection", "paragraph", ] for heading in lstTeXHeading: matches = findRegEx(r"\\" + heading + r"\{([^}]*?)\}", text) for match in matches: words = split2words(match[2].group(1)) for word in words: if (len(word) > 3 and word[0].islower() and word not in lstAdpos + lstDet + lstConjunction): print("Lowercase letter in heading: ", match[2].group(1)) break