def cleanup(input_string): """ Removes and strip separators from input_string (but keep ',;' characters) :param input_string: :type input_string: :return: :rtype: """ for char in clean_chars: input_string = input_string.replace(char, ' ') return re.sub(' +', ' ', strip(input_string))
def cleanup(input_string): """ Removes and strip separators from input_string (but keep ',;' characters) It also keep separators for single characters (Mavels Agents of S.H.I.E.L.D.) :param input_string: :type input_string: str :return: :rtype: """ clean_string = input_string for char in clean_chars: clean_string = clean_string.replace(char, ' ') # Restore input separator if they separate single characters. # Useful for Mavels Agents of S.H.I.E.L.D. # https://github.com/guessit-io/guessit/issues/278 indices = [i for i, letter in enumerate(clean_string) if letter in seps] dots = set() if indices: clean_list = list(clean_string) potential_indices = [] for i in indices: if _potential_before(i, input_string) and _potential_after( i, input_string): potential_indices.append(i) replace_indices = [] for potential_index in potential_indices: if potential_index - 2 in potential_indices or potential_index + 2 in potential_indices: replace_indices.append(potential_index) if replace_indices: for replace_index in replace_indices: dots.add(input_string[replace_index]) clean_list[replace_index] = input_string[replace_index] clean_string = ''.join(clean_list) clean_string = strip(clean_string, ''.join([c for c in seps if c not in dots])) clean_string = re.sub(' +', ' ', clean_string) return clean_string
def cleanup(input_string): """ Removes and strip separators from input_string (but keep ',;' characters) It also keep separators for single characters (Mavels Agents of S.H.I.E.L.D.) :param input_string: :type input_string: str :return: :rtype: """ clean_string = input_string for char in clean_chars: clean_string = clean_string.replace(char, ' ') # Restore input separator if they separate single characters. # Useful for Mavels Agents of S.H.I.E.L.D. # https://github.com/guessit-io/guessit/issues/278 indices = [i for i, letter in enumerate(clean_string) if letter in seps] dots = set() if indices: clean_list = list(clean_string) potential_indices = [] for i in indices: if _potential_before(i, input_string) and _potential_after(i, input_string): potential_indices.append(i) replace_indices = [] for potential_index in potential_indices: if potential_index - 2 in potential_indices or potential_index + 2 in potential_indices: replace_indices.append(potential_index) if replace_indices: for replace_index in replace_indices: dots.add(input_string[replace_index]) clean_list[replace_index] = input_string[replace_index] clean_string = ''.join(clean_list) clean_string = strip(clean_string, ''.join([c for c in seps if c not in dots])) clean_string = re.sub(' +', ' ', clean_string) return clean_string