def preProcess(self, column): """ """ column = dedupe.asciiDammit(column) column = re.sub(' +', ' ', column) column = re.sub('\n', ' ', column) column = column.strip().strip('"').strip("'").lower().strip() return column
def preProcess(column): """ Do a little bit of data cleaning with the help of [AsciiDammit](https://github.com/tnajdek/ASCII--Dammit) and Regex. Things like casing, extra spaces, quotes and new lines can be ignored. """ column = dedupe.asciiDammit(column) column = re.sub(' +', ' ', column) column = re.sub('\n', ' ', column) column = column.strip().strip('"').strip("'").lower().strip() return column