if data.find(weird_char) > -1: data = data.replace(weird_char, '') pattern = re.compile(item['re']) groups = pattern.findall(data) lines = [] for group in groups: if item['csv'] in ('categories', ): group = [x for x in group if x != ''] if len(group) == 1: category = ''.join(group) continue else: path = ''.join(group) lines.append('%s,%s' % (Utils.CsvEscape(str(Utils.HtmlUnescape(category))), Utils.CsvEscape(str(Utils.HtmlUnescape(path))))) elif item['csv'] in ('countries', 'currencies'): lines.append('%s,%s' % (Utils.CsvEscape(str(Utils.HtmlUnescape(group[0]))), Utils.CsvEscape(str(Utils.HtmlUnescape(group[1]))))) elif item['csv'] in ('error_codes', ): pattern = re.compile('<.*?>') message = list(group)[1] message = pattern.sub('', message) lines.append('%s,%s' % (Utils.CsvEscape(str(Utils.HtmlUnescape(group[0]))), Utils.CsvEscape(str(Utils.HtmlUnescape(message))))) elif item['csv'] in ('languages', ): # Convert '-' into ''. new_group = []