Example #1
0
def cleanup(input_string):
    """
    Removes and strip separators from input_string (but keep ',;' characters)
    :param input_string:
    :type input_string:
    :return:
    :rtype:
    """
    for char in clean_chars:
        input_string = input_string.replace(char, ' ')
    return re.sub(' +', ' ', strip(input_string))
Example #2
0
def cleanup(input_string):
    """
    Removes and strip separators from input_string (but keep ',;' characters)
    :param input_string:
    :type input_string:
    :return:
    :rtype:
    """
    for char in clean_chars:
        input_string = input_string.replace(char, ' ')
    return re.sub(' +', ' ', strip(input_string))
Example #3
0
def cleanup(input_string):
    """
    Removes and strip separators from input_string (but keep ',;' characters)

    It also keep separators for single characters (Mavels Agents of S.H.I.E.L.D.)

    :param input_string:
    :type input_string: str
    :return:
    :rtype:
    """
    clean_string = input_string
    for char in clean_chars:
        clean_string = clean_string.replace(char, ' ')

    # Restore input separator if they separate single characters.
    # Useful for Mavels Agents of S.H.I.E.L.D.
    # https://github.com/guessit-io/guessit/issues/278

    indices = [i for i, letter in enumerate(clean_string) if letter in seps]

    dots = set()
    if indices:
        clean_list = list(clean_string)

        potential_indices = []

        for i in indices:
            if _potential_before(i, input_string) and _potential_after(
                    i, input_string):
                potential_indices.append(i)

        replace_indices = []

        for potential_index in potential_indices:
            if potential_index - 2 in potential_indices or potential_index + 2 in potential_indices:
                replace_indices.append(potential_index)

        if replace_indices:
            for replace_index in replace_indices:
                dots.add(input_string[replace_index])
                clean_list[replace_index] = input_string[replace_index]
            clean_string = ''.join(clean_list)

    clean_string = strip(clean_string,
                         ''.join([c for c in seps if c not in dots]))

    clean_string = re.sub(' +', ' ', clean_string)
    return clean_string
Example #4
0
def cleanup(input_string):
    """
    Removes and strip separators from input_string (but keep ',;' characters)

    It also keep separators for single characters (Mavels Agents of S.H.I.E.L.D.)

    :param input_string:
    :type input_string: str
    :return:
    :rtype:
    """
    clean_string = input_string
    for char in clean_chars:
        clean_string = clean_string.replace(char, ' ')

    # Restore input separator if they separate single characters.
    # Useful for Mavels Agents of S.H.I.E.L.D.
    # https://github.com/guessit-io/guessit/issues/278

    indices = [i for i, letter in enumerate(clean_string) if letter in seps]

    dots = set()
    if indices:
        clean_list = list(clean_string)

        potential_indices = []

        for i in indices:
            if _potential_before(i, input_string) and _potential_after(i, input_string):
                potential_indices.append(i)

        replace_indices = []

        for potential_index in potential_indices:
            if potential_index - 2 in potential_indices or potential_index + 2 in potential_indices:
                replace_indices.append(potential_index)

        if replace_indices:
            for replace_index in replace_indices:
                dots.add(input_string[replace_index])
                clean_list[replace_index] = input_string[replace_index]
            clean_string = ''.join(clean_list)

    clean_string = strip(clean_string, ''.join([c for c in seps if c not in dots]))

    clean_string = re.sub(' +', ' ', clean_string)
    return clean_string