def test_to_number_boundaries_integral():
    for np_type, ms_type in zip(np_integral_types, ms_integral_types):
        type_info = np.iinfo(np_type)
        input_strings = [str(type_info.max)]
        dataset = ds.GeneratorDataset(string_dataset_generator(input_strings),
                                      "strings")
        dataset = dataset.map(operations=text.ToNumber(ms_type),
                              input_columns=["strings"])
        for data in dataset.create_dict_iterator(num_epochs=1,
                                                 output_numpy=True):
            assert data["strings"] == int(input_strings[0])

        input_strings = [str(type_info.min)]
        dataset = ds.GeneratorDataset(string_dataset_generator(input_strings),
                                      "strings")
        dataset = dataset.map(operations=text.ToNumber(ms_type),
                              input_columns=["strings"])
        for data in dataset.create_dict_iterator(num_epochs=1,
                                                 output_numpy=True):
            assert data["strings"] == int(input_strings[0])

        input_strings = [str(0)]
        dataset = ds.GeneratorDataset(string_dataset_generator(input_strings),
                                      "strings")
        dataset = dataset.map(operations=text.ToNumber(ms_type),
                              input_columns=["strings"])
        for data in dataset.create_dict_iterator(num_epochs=1,
                                                 output_numpy=True):
            assert data["strings"] == int(input_strings[0])
Beispiel #2
0
def test_to_number_out_of_bounds_integral():
    for np_type, ms_type in zip(np_integral_types, ms_integral_types):
        type_info = np.iinfo(np_type)
        input_strings = [str(type_info.max + 10)]
        dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings")
        dataset = dataset.map(operations=text.ToNumber(ms_type), input_columns=["strings"])
        out_of_bounds_error_message_check(dataset, np_type, input_strings[0])

        input_strings = [str(type_info.min - 10)]
        dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings")
        dataset = dataset.map(operations=text.ToNumber(ms_type), input_columns=["strings"])
        out_of_bounds_error_message_check(dataset, np_type, input_strings[0])
def test_to_number_invalid_type():
    with pytest.raises(TypeError) as info:
        dataset = ds.GeneratorDataset(string_dataset_generator(["a8fa9ds8fa"]),
                                      "strings")
        dataset = dataset.map(operations=text.ToNumber(mstype.bool_),
                              input_columns=["strings"])
    assert "data_type: Bool is not numeric data type" in str(info.value)
Beispiel #4
0
def test_to_number_invalid_input():
    input_strings = ["a8fa9ds8fa"]
    dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings")
    dataset = dataset.map(operations=text.ToNumber(mstype.int32), input_columns=["strings"])

    with pytest.raises(RuntimeError) as info:
        for _ in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
            pass
    assert "it is invalid to convert \"" + input_strings[0] + "\" to a number" in str(info.value)
Beispiel #5
0
def test_to_number_out_of_bounds_non_integral():
    above_range = [str(np.finfo(np.float16).max * 10), str(np.finfo(np.float32).max * 10), "1.8e+308"]

    input_strings = [above_range[0]]
    dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings")
    dataset = dataset.map(operations=text.ToNumber(ms_non_integral_types[0]), input_columns=["strings"])

    with pytest.raises(RuntimeError) as info:
        for _ in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
            pass
    assert "outside of valid float16 range" in str(info.value)

    input_strings = [above_range[1]]
    dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings")
    dataset = dataset.map(operations=text.ToNumber(ms_non_integral_types[1]), input_columns=["strings"])

    with pytest.raises(RuntimeError) as info:
        for _ in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
            pass
    assert "string input " + input_strings[0] + " will be out of bounds if cast to float32" in str(info.value)

    input_strings = [above_range[2]]
    dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings")
    dataset = dataset.map(operations=text.ToNumber(ms_non_integral_types[2]), input_columns=["strings"])

    with pytest.raises(RuntimeError) as info:
        for _ in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
            pass
    assert "string input " + input_strings[0] + " will be out of bounds if cast to float64" in str(info.value)

    below_range = [str(np.finfo(np.float16).min * 10), str(np.finfo(np.float32).min * 10), "-1.8e+308"]

    input_strings = [below_range[0]]
    dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings")
    dataset = dataset.map(operations=text.ToNumber(ms_non_integral_types[0]), input_columns=["strings"])

    with pytest.raises(RuntimeError) as info:
        for _ in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
            pass
    assert "outside of valid float16 range" in str(info.value)

    input_strings = [below_range[1]]
    dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings")
    dataset = dataset.map(operations=text.ToNumber(ms_non_integral_types[1]), input_columns=["strings"])

    with pytest.raises(RuntimeError) as info:
        for _ in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
            pass
    assert "string input " + input_strings[0] + " will be out of bounds if cast to float32" in str(info.value)

    input_strings = [below_range[2]]
    dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings")
    dataset = dataset.map(operations=text.ToNumber(ms_non_integral_types[2]), input_columns=["strings"])

    with pytest.raises(RuntimeError) as info:
        for _ in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
            pass
    assert "string input " + input_strings[0] + " will be out of bounds if cast to float64" in str(info.value)
Beispiel #6
0
def test_to_number_typical_case_integral():
    input_strings = [["-121", "14"], ["-2219", "7623"], ["-8162536", "162371864"],
                     ["-1726483716", "98921728421"]]

    for ms_type, inputs in zip(ms_integral_types, input_strings):
        dataset = ds.GeneratorDataset(string_dataset_generator(inputs), "strings")
        dataset = dataset.map(operations=text.ToNumber(ms_type), input_columns=["strings"])

        expected_output = [int(string) for string in inputs]
        output = []
        for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
            output.append(data["strings"])

        assert output == expected_output
Beispiel #7
0
def test_to_number_typical_case_non_integral():
    input_strings = [["-1.1", "1.4"], ["-2219.321", "7623.453"], ["-816256.234282", "162371864.243243"]]
    epsilons = [0.001, 0.001, 0.0001, 0.0001, 0.0000001, 0.0000001]

    for ms_type, inputs in zip(ms_non_integral_types, input_strings):
        dataset = ds.GeneratorDataset(string_dataset_generator(inputs), "strings")
        dataset = dataset.map(operations=text.ToNumber(ms_type), input_columns=["strings"])

        expected_output = [float(string) for string in inputs]
        output = []
        for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
            output.append(data["strings"])

        for expected, actual, epsilon in zip(expected_output, output, epsilons):
            assert abs(expected - actual) < epsilon
Beispiel #8
0
def test_to_number_eager():
    """
    Test ToNumber op is callable
    """
    input_strings = [["1", "2", "3"], ["4", "5", "6"]]
    op = text.ToNumber(mstype.int8)

    # test input_strings as one 2D tensor
    result1 = op(input_strings) # np array: [[1 2 3] [4 5 6]]
    assert np.array_equal(result1, np.array([[1, 2, 3], [4, 5, 6]], dtype='i'))

    # test input multiple tensors
    with pytest.raises(RuntimeError) as info:
        # test input_strings as two 1D tensor. It's error because to_number is an OneToOne op
        _ = op(*input_strings)
    assert "The op is OneToOne, can only accept one tensor as input." in str(info.value)

    # test input invalid tensor
    invalid_input = [["1", "2", "3"], ["4", "5"]]
    with pytest.raises(TypeError) as info:
        _ = op(invalid_input)
    assert "Invalid user input. Got <class 'list'>: [['1', '2', '3'], ['4', '5']], cannot be converted into tensor" in \
           str(info.value)