Python Ngram Examples

Programming Language: Python

Namespace/Package Name: mindspore.dataset.text

Method/Function: Ngram

Examples at hotexamples.com: 5

Python Ngram - 5 examples found. These are the top rated real world Python examples of mindspore.dataset.text.Ngram extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: test_ngram_op.py Project: windhooked/mindspore

def test_multiple_ngrams():
    """ test n-gram where n is a list of integers"""
    plates_mottos = [
        "WildRose Country", "Canada's Ocean Playground", "Land of Living Skies"
    ]
    n_gram_mottos = []
    n_gram_mottos.append([
        'WildRose', 'Country', '_ WildRose', 'WildRose Country', 'Country _',
        '_ _ WildRose', '_ WildRose Country', 'WildRose Country _',
        'Country _ _'
    ])
    n_gram_mottos.append([
        "Canada's", 'Ocean', 'Playground', "_ Canada's", "Canada's Ocean",
        'Ocean Playground', 'Playground _', "_ _ Canada's", "_ Canada's Ocean",
        "Canada's Ocean Playground", 'Ocean Playground _', 'Playground _ _'
    ])
    n_gram_mottos.append([
        'Land', 'of', 'Living', 'Skies', '_ Land', 'Land of', 'of Living',
        'Living Skies', 'Skies _', '_ _ Land', '_ Land of', 'Land of Living',
        'of Living Skies', 'Living Skies _', 'Skies _ _'
    ])

    def gen(texts):
        for line in texts:
            yield (np.array(line.split(" "), dtype='S'), )

    dataset = ds.GeneratorDataset(gen(plates_mottos), column_names=["text"])
    dataset = dataset.map(input_columns=["text"],
                          operations=nlp.Ngram([1, 2, 3], ("_", 2), ("_", 2),
                                               " "))

    i = 0
    for data in dataset.create_dict_iterator():
        assert [d.decode("utf8") for d in data["text"]] == n_gram_mottos[i]
        i += 1

Example #2

Show file

File: test_ngram_op.py Project: zuoshou030/mindspore

    def test_config(input_line, n, l_pad=("", 0), r_pad=("", 0), sep=" "):
        def gen(texts):
            yield (np.array(texts.split(" "), dtype='S'),)

        try:
            dataset = ds.GeneratorDataset(gen(input_line), column_names=["text"])
            dataset = dataset.map(input_columns=["text"], operations=text.Ngram(n, l_pad, r_pad, separator=sep))
            for data in dataset.create_dict_iterator():
                return [d.decode("utf8") for d in data["text"]]
        except (ValueError, TypeError) as e:
            return str(e)

Example #3

Show file

File: test_ngram_op.py Project: yrpang/mindspore

def test_ngram_callable():
    """
    Test ngram op is callable
    """
    op = text.Ngram(2, separator="-")

    input1 = " WildRose Country"
    input1 = np.array(input1.split(" "), dtype='S')
    expect1 = ['-WildRose', 'WildRose-Country']
    result1 = op(input1)
    assert np.array_equal(result1, expect1)

    input2 = ["WildRose Country", "Canada's Ocean Playground", "Land of Living Skies"]
    expect2 = ["WildRose Country-Canada's Ocean Playground", "Canada's Ocean Playground-Land of Living Skies"]
    result2 = op(input2)
    assert np.array_equal(result2, expect2)

Example #4

Show file

File: test_ngram_op.py Project: windhooked/mindspore

    def test_config(input_line,
                    output_line,
                    n,
                    l_pad=None,
                    r_pad=None,
                    sep=None):
        def gen(text):
            yield (np.array(text.split(" "), dtype='S'), )

        dataset = ds.GeneratorDataset(gen(input_line), column_names=["text"])
        dataset = dataset.map(input_columns=["text"],
                              operations=nlp.Ngram(n,
                                                   l_pad,
                                                   r_pad,
                                                   separator=sep))
        for data in dataset.create_dict_iterator():
            assert [d.decode("utf8")
                    for d in data["text"]] == output_line, output_line

Example #5

Show file

File: test_ngram_op.py Project: zuoshou030/mindspore

def test_simple_ngram():
    """ test simple gram with only one n value"""
    plates_mottos = ["Friendly Manitoba", "Yours to Discover", "Land of Living Skies",
                     "Birthplace of the Confederation"]
    n_gram_mottos = [[""]]
    n_gram_mottos.append(["Yours to Discover"])
    n_gram_mottos.append(['Land of Living', 'of Living Skies'])
    n_gram_mottos.append(['Birthplace of the', 'of the Confederation'])

    def gen(texts):
        for line in texts:
            yield (np.array(line.split(" "), dtype='S'),)

    dataset = ds.GeneratorDataset(gen(plates_mottos), column_names=["text"])
    dataset = dataset.map(input_columns=["text"], operations=text.Ngram(3, separator=" "))

    i = 0
    for data in dataset.create_dict_iterator():
        assert [d.decode("utf8") for d in data["text"]] == n_gram_mottos[i], i
        i += 1