예제 #1
0
def extractnumber(text, lang="en-us"):
    """Takes in a string and extracts a number.
    Args:
        text (str): the string to extract a number from
        lang (str): the code for the language text is in
    Returns:
        (str): The number extracted or the original text.
    """

    lang_lower = str(lang).lower()
    if lang_lower.startswith("en"):
        # return extractnumber_en(text, remove_articles)
        return extractnumber_en(text)
    elif lang_lower.startswith("pt"):
        return extractnumber_pt(text)
    elif lang_lower.startswith("it"):
        return extractnumber_it(text)
    elif lang_lower.startswith("fr"):
        return extractnumber_fr(text)
    elif lang_lower.startswith("sv"):
        return extractnumber_sv(text)
    elif lang_lower.startswith("de"):
        return extractnumber_de(text)

    # TODO: extractnumber for other languages
    return text
예제 #2
0
파일: parse.py 프로젝트: wbwj/mycroft-core
def extract_number(text, short_scale=True, ordinals=False, lang="en-us"):
    """Takes in a string and extracts a number.

    Args:
        text (str): the string to extract a number from
        short_scale (bool): Use "short scale" or "long scale" for large
            numbers -- over a million.  The default is short scale, which
            is now common in most English speaking countries.
            See https://en.wikipedia.org/wiki/Names_of_large_numbers
        ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
        lang (str): the BCP-47 code for the language to use
    Returns:
        (int, float or False): The number extracted or False if the input
                               text contains no numbers
    """
    lang_lower = str(lang).lower()
    if lang_lower.startswith("en"):
        return extractnumber_en(text,
                                short_scale=short_scale,
                                ordinals=ordinals)
    elif lang_lower.startswith("pt"):
        return extractnumber_pt(text)
    elif lang_lower.startswith("it"):
        return extractnumber_it(text)
    elif lang_lower.startswith("fr"):
        return extractnumber_fr(text)
    elif lang_lower.startswith("sv"):
        return extractnumber_sv(text)
    elif lang_lower.startswith("de"):
        return extractnumber_de(text)
    # TODO: extractnumber for other languages
    return text
예제 #3
0
def extract_number(text, short_scale=True, lang="en-us"):
    """Takes in a string and extracts a number.
    Args:
        text (str): the string to extract a number from
        short_scale (bool): use short or long scale. See
            https://en.wikipedia.org/wiki/Names_of_large_numbers

        lang (str): the code for the language text is in
    Returns:
        (int, float or False): The number extracted or False if the input
                               text contains no numbers
    """

    lang_lower = str(lang).lower()
    if lang_lower.startswith("en"):
        return extractnumber_en(text, short_scale)
    elif lang_lower.startswith("pt"):
        return extractnumber_pt(text)
    elif lang_lower.startswith("it"):
        return extractnumber_it(text)
    elif lang_lower.startswith("fr"):
        return extractnumber_fr(text)
    elif lang_lower.startswith("sv"):
        return extractnumber_sv(text)
    elif lang_lower.startswith("de"):
        return extractnumber_de(text)
    # TODO: extractnumber for other languages
    return text
예제 #4
0
def extract_number(text, short_scale=True, ordinals=False, lang=None):
    """Takes in a string and extracts a number.

    Args:
        text (str): the string to extract a number from
        short_scale (bool): Use "short scale" or "long scale" for large
            numbers -- over a million.  The default is short scale, which
            is now common in most English speaking countries.
            See https://en.wikipedia.org/wiki/Names_of_large_numbers
        ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
        lang (str): the BCP-47 code for the language to use, None uses default
    Returns:
        (int, float or False): The number extracted or False if the input
                               text contains no numbers
    """
    lang_code = get_primary_lang_code(lang)
    if lang_code == "en":
        return extractnumber_en(text,
                                short_scale=short_scale,
                                ordinals=ordinals)
    elif lang_code == "es":
        return extractnumber_es(text)
    elif lang_code == "pt":
        return extractnumber_pt(text)
    elif lang_code == "it":
        return extractnumber_it(text,
                                short_scale=short_scale,
                                ordinals=ordinals)
    elif lang_code == "fr":
        return extractnumber_fr(text)
    elif lang_code == "sv":
        return extractnumber_sv(text)
    elif lang_code == "de":
        return extractnumber_de(text)
    elif lang_code == "da":
        return extractnumber_da(text)
    elif lang_code == "nl":
        return extractnumber_nl(text,
                                short_scale=short_scale,
                                ordinals=ordinals)
    # TODO: extractnumber_xx for other languages
    _log_unsupported_language(
        lang_lower, ['en', 'es', 'pt', 'it', 'fr', 'sv', 'de', 'da', 'nl'])
    return text
예제 #5
0
def extract_number(text, short_scale=True, ordinals=False, lang=None):
    """Takes in a string and extracts a number.

    Args:
        text (str): the string to extract a number from
        short_scale (bool): Use "short scale" or "long scale" for large
            numbers -- over a million.  The default is short scale, which
            is now common in most English speaking countries.
            See https://en.wikipedia.org/wiki/Names_of_large_numbers
        ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
        lang (str): the BCP-47 code for the language to use, None uses default
    Returns:
        (int, float or False): The number extracted or False if the input
                               text contains no numbers
    """
    lang_code = get_primary_lang_code(lang)
    if lang_code == "en":
        return extractnumber_en(text, short_scale=short_scale,
                                ordinals=ordinals)
    elif lang_code == "es":
        return extractnumber_es(text)
    elif lang_code == "pt":
        return extractnumber_pt(text)
    elif lang_code == "it":
        return extractnumber_it(text, short_scale=short_scale,
                                ordinals=ordinals)
    elif lang_code == "fr":
        return extractnumber_fr(text)
    elif lang_code == "sv":
        return extractnumber_sv(text)
    elif lang_code == "de":
        return extractnumber_de(text)
    elif lang_code == "da":
        return extractnumber_da(text)
    # TODO: extractnumber_xx for other languages
    _log_unsupported_language(lang_lower,
                              ['en', 'es', 'pt', 'it', 'fr', 'sv', 'de', 'da'])
    return text
예제 #6
0
def extract_number(text, short_scale=True, ordinals=False, lang="en-us"):
    """Takes in a string and extracts a number.

    Args:
        text (str): the string to extract a number from
        short_scale (bool): Use "short scale" or "long scale" for large
            numbers -- over a million.  The default is short scale, which
            is now common in most English speaking countries.
            See https://en.wikipedia.org/wiki/Names_of_large_numbers
        ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
        lang (str): the BCP-47 code for the language to use
    Returns:
        (int, float or False): The number extracted or False if the input
                               text contains no numbers
    """
    lang_lower = str(lang).lower()
    if lang_lower.startswith("en"):
        return extractnumber_en(text, short_scale=short_scale,
                                ordinals=ordinals)
    elif lang_lower.startswith("es"):
        return extractnumber_es(text)
    elif lang_lower.startswith("pt"):
        return extractnumber_pt(text)
    elif lang_lower.startswith("it"):
        return extractnumber_it(text)
    elif lang_lower.startswith("fr"):
        return extractnumber_fr(text)
    elif lang_lower.startswith("sv"):
        return extractnumber_sv(text)
    elif lang_lower.startswith("de"):
        return extractnumber_de(text)
    # TODO: extractnumber_xx for other languages
    LOG.warning('Language "{}" not recognized! Please make sure your '
                'language is one of the following: '
                'en, es, pt, it, fr, sv, de.'.format(lang_lower))
    return text