예제 #1
0
def tag(string):
    """ Tags a string.

    Parameters
    ----------
    string : str,


    Returns
    -------
    tuples : list of tuples and boolean

    Examples
    --------
    """
    sentence_with_no_accent = remove_accents(string)
    for tag in regex_segmenting_order:
        for compiled_regex in compiled_regex_segmenting_dict[tag]:
            if compiled_regex.search(sentence_with_no_accent):
                if tag in ["HELLO", "GREETINGS", "THANKS"]:
                    # We search for words of the flag list who mean the sentence contains information as body
                    for regex, value in regex_flags_dict.items():
                        if re.search(pattern=regex,
                                     string=sentence_with_no_accent,
                                     flags=re.IGNORECASE):
                            return string, False
                return [(string, tag)], True

    return string, False
예제 #2
0
def tag(string):
    """ Tags a string.

    Parameters
    ----------
    string : str,


    Returns
    -------
    tuples : list of tuples and boolean

    Examples
    --------
    """
    regex_parts = compiled_regex_segmenting_dict.items()
    sentence_with_no_accent = remove_accents(string)
    for k, compiled_regex_list in regex_parts:
        for compiled_regex in compiled_regex_list:
            if compiled_regex.search(sentence_with_no_accent):
                return [(string, k)], True

    return string, False
예제 #3
0
def tag(string):
    """ Tags a string.

    Parameters
    ----------
    string : str,


    Returns
    -------
    tuples : list of tuples and boolean

    Examples
    --------
    """
    regex_parts = regex_segmenting_dict.items()
    sentence_with_no_accent = remove_accents(string)
    for k, reg in regex_parts:
        for r in reg:
            r = r.replace(" ", regex_tag)
            if re.search(r, sentence_with_no_accent, re.I):
                return [(string, k)], True

    return string, False
예제 #4
0
def test_remove_accents():
    input_str = "éèëêàù"
    expected_str = "eeeeau"

    result = remove_accents(input_str)
    np.testing.assert_string_equal(result, expected_str)