Python strip_accentsの例

プログラミング言語: Python

名前空間/パッケージ名: sklearn.feature_extraction.text

メソッド/関数: strip_accents

hotexamples.comのコード掲載数: 4

Python strip_accents - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのsklearn.feature_extraction.text.strip_accentsの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: test_text.py プロジェクト: smidm/scikit-learn

def test_strip_accents():
    # check some classical latin accentuated symbols
    a = u'\xe0\xe1\xe2\xe3\xe4\xe5\xe7\xe8\xe9\xea\xeb'
    expected = u'aaaaaaceeee'
    assert_equal(strip_accents(a), expected)

    a = u'\xec\xed\xee\xef\xf1\xf2\xf3\xf4\xf5\xf6\xf9\xfa\xfb\xfc\xfd'
    expected = u'iiiinooooouuuuy'
    assert_equal(strip_accents(a), expected)

    # check some arabic
    a = u'\u0625'  # halef with a hamza below
    expected = u'\u0627'  # simple halef
    assert_equal(strip_accents(a), expected)

    # mix letters accentuated and not
    a = u"this is \xe0 test"
    expected = u'this is a test'
    assert_equal(strip_accents(a), expected)

コード例 #2

ファイルを表示

ファイル: test_text.py プロジェクト: aravindgd/scikit-learn

def test_strip_accents():
    # check some classical latin accentuated symbols
    a = u'\xe0\xe1\xe2\xe3\xe4\xe5\xe7\xe8\xe9\xea\xeb'
    expected = u'aaaaaaceeee'
    assert_equal(strip_accents(a), expected)

    a = u'\xec\xed\xee\xef\xf1\xf2\xf3\xf4\xf5\xf6\xf9\xfa\xfb\xfc\xfd'
    expected = u'iiiinooooouuuuy'
    assert_equal(strip_accents(a), expected)

    # check some arabic
    a = u'\u0625'  # halef with a hamza below
    expected = u'\u0627'  # simple halef
    assert_equal(strip_accents(a), expected)

    # mix letters accentuated and not
    a = u"this is \xe0 test"
    expected = u'this is a test'
    assert_equal(strip_accents(a), expected)

コード例 #3

ファイルを表示

ファイル: sutils.py プロジェクト: yz-/ut

def to_lower_ascii(d):
    if isinstance(d, pd.DataFrame):
        d = d.copy()
        d = d.convert_objects(convert_dates=True, convert_numeric=True)
        lower_ascii = lambda x: strip_accents(x).lower()
        d.columns = map(lower_ascii, d.columns)
        for c in d.columns:
            if d[c].dtype == 'O':
                d[c].fillna('', inplace=True)
            if d[c].dtype != 'float' and d[c].dtype != 'int':
                try:
                    d[c] = map(lower_ascii, map(unicode, d[c]))
                except TypeError as e:
                    print e.message
        return d
    else:
        raise NotImplementedError("the input format '{}' is not handled".format(type(d)))

コード例 #4

ファイルを表示

ファイル: sutils.py プロジェクト: SRHerzog/ut

def to_lower_ascii(d):
    if isinstance(d, pd.DataFrame):
        d = d.copy()
        d = d.convert_objects(convert_dates=True, convert_numeric=True)
        lower_ascii = lambda x: strip_accents(x).lower()
        d.columns = map(lower_ascii, d.columns)
        for c in d.columns:
            if d[c].dtype == 'O':
                d[c].fillna('', inplace=True)
            if d[c].dtype != 'float' and d[c].dtype != 'int':
                try:
                    d[c] = map(lower_ascii, map(unicode, d[c]))
                except TypeError as e:
                    print e.message
        return d
    else:
        raise NotImplementedError(
            "the input format '{}' is not handled".format(type(d)))