def normalize_discogs_name(original_text): """ Many discogs name appear with a number between brackets. Im not sure about its meaning, but pretty sure about i should ignore them. We are erasing them here from the original strings. :param original_text: :return: """ if original_text in EMPTY_CONTENT: return original_text return normalize(remove_brackets_with_numbers(original_text))
def extract_unique_normalized_ngrmas(original_str, size=3): """ Function to extract n-grams, 3-grams actually. It returns a list of the n-grams of $size size in the resulting str of normalizing original_str. If an n-gram appears twice (or more times) it will appear only once in the final list. :param original_str: :param size: :return: """ return extract_unique_ngrams(normalize(original_str), size)
def normalize_for_uri(original_str): # TODO fuera caracteres raros return normalize(original_str).replace(" ", "_")