Ejemplo n.º 1
0
def normalize_discogs_name(original_text):
    """
    Many discogs name appear with a number between brackets. Im not sure
    about its meaning, but pretty sure about i should ignore them.
    We are erasing them here from the original strings.

    :param original_text:
    :return:
    """
    if original_text in EMPTY_CONTENT:
        return original_text
    return normalize(remove_brackets_with_numbers(original_text))
Ejemplo n.º 2
0
def extract_unique_normalized_ngrmas(original_str, size=3):
    """
    Function to extract n-grams, 3-grams actually. It returns a list
    of the n-grams of $size size in the resulting str of normalizing
    original_str.
    If an n-gram appears twice (or more times) it will appear
    only once in the final list.

    :param original_str:
    :param size:
    :return:
    """
    return extract_unique_ngrams(normalize(original_str), size)
Ejemplo n.º 3
0
 def normalize_for_uri(original_str):
     # TODO fuera caracteres raros
     return normalize(original_str).replace(" ", "_")