예제 #1
0
def replace_stopwords(s: TextSeries,
                      symbol: str,
                      stopwords: Optional[Set[str]] = None) -> TextSeries:
    """
    Replace all instances of `words` with symbol.

    By default uses NLTK's english stopwords of 179 words.

    Parameters
    ----------
    s : :class:`texthero._types.TextSeries`

    symbol: str
        Character(s) to replace words with.

    stopwords : Set[str], optional, default=None
        Set of stopwords string to remove. If not passed,
        by default uses NLTK English stopwords.

    Examples
    --------
    >>> import texthero as hero
    >>> import pandas as pd
    >>> s = pd.Series("the book of the jungle")
    >>> hero.replace_stopwords(s, "X")
    0    X book X X jungle
    dtype: object

    """

    if stopwords is None:
        stopwords = _stopwords.DEFAULT
    return s.apply(_replace_stopwords, args=(stopwords, symbol))
예제 #2
0
def too_many_uppercase(s: TextSeries) -> TextSeries:
    """
    Says whether a string has too many uppercase characters.


    Examples
    --------
    >>> import texthero as hero
    >>> import pandas as pd
    >>> s = pd.Series("This is NeW YoRk wIth upPer leTTers")
    >>> hero.too_many_uppercase(s)
    0    True
    dtype: object
    """
    return s.apply(_too_many_uppercase)
예제 #3
0
def count_whitespaces(s: TextSeries) -> TextSeries:
    """
    Count number of whitespaces in a string.


    Examples
    --------
    >>> import texthero as hero
    >>> import pandas as pd
    >>> s = pd.Series("This is NeW YoRk wIth upPer letters")
    >>> hero.count_whitespaces(s)
    0    6
    dtype: object
    """
    return s.apply(_count_whitespaces)
예제 #4
0
def count_uppercase(s: TextSeries) -> TextSeries:
    """
    Lowercase all texts in a series.


    Examples
    --------
    >>> import texthero as hero
    >>> import pandas as pd
    >>> s = pd.Series("This is NeW YoRk wIth upPer letters")
    >>> hero.count_uppercase(s)
    0    5
    dtype: object
    """
    return s.apply(_count_uppercase)
예제 #5
0
def lowercase_restricted(s: TextSeries) -> TextSeries:
    """
    Lowercase all texts in a series except for those with too many uppercase chars.


    Examples
    --------
    >>> import texthero as hero
    >>> import pandas as pd
    >>> s = pd.Series("This is NeW YoRk wIth upPer leTTers")
    >>> hero.lowercase_restricted(s)
    0    this is new york with upper letters
    dtype: object
    """

    return s.apply(_lowercase_restricted)
예제 #6
0
def place_emojis(s: TextSeries) -> TextSeries:
    """
    Place back emojis in a string, replacing placeholders with emojis.

    Parameters
    ----------
    s : :class:`texthero._types.TextSeries`

    Examples
    --------
    >>> import texthero as hero
    >>> import pandas as pd
    >>> s = pd.Series("the book of the jungle :smiling_face_with_horns:")
    >>> hero.place_emojis(s)
    0    the book of the jungle 😈
    dtype: object

    """

    return s.apply(_place_emojis)
예제 #7
0
def check_spelling(s: TextSeries) -> TextSeries:
    return s.apply(_check_spelling)