Beispiel #1
0
def test_tuple_node_works():
    data = np.array([(0., 1.), (2., 3.)])
    x_node = Tuple()
    x_node.addfilter(Missing("prob", "m_val"))
    series_node = Series(x_node)
    data = series_node.generate_error(data, {'prob': 1, 'm_val': np.nan})
    assert np.isnan(data[0][0]) and np.isnan(data[0][1]) and np.isnan(
        data[1][0]) and np.isnan(data[1][1])
Beispiel #2
0
def test_series_and_array_work_with_regular_arrays():
    a = [0.]
    x_node = Array()
    x_node.addfilter(Missing("prob", "m_val"))
    series_node = Series(x_node)
    params = {"prob": 1., "m_val": np.nan}
    out = series_node.generate_error(a, params)
    assert np.isnan(out[0])
Beispiel #3
0
def test_seed_determines_result_for_time_dependent_gaussian_noise():
    a = np.arange(25).reshape((5, 5)).astype(np.float64)
    params = {}
    params['mean'] = 2.
    params['std'] = 3.
    params['mean_inc'] = 1.
    params['std_inc'] = 4.
    x_node = Array()
    x_node.addfilter(
        GaussianNoiseTimeDependent('mean', 'std', 'mean_inc', 'std_inc'))
    series_node = Series(x_node, dim_name="time")
    out1 = series_node.generate_error(a, params,
                                      np.random.RandomState(seed=42))
    out2 = series_node.generate_error(a, params,
                                      np.random.RandomState(seed=42))
    assert np.allclose(out1, out2)
Beispiel #4
0
def main():
    """An example that rotates MNIST digits and displays one.
    Usage: python run_rotate_MNIST_example <angle>
    where <angle> is the angle of rotation
    (e.g. 90 to rotate by pi / 2)
    """
    x, _, _, _ = load_mnist()
    xs = x[:20]  # small subset of x
    angle = float(sys.argv[1])
    print(f"x subset shape: {xs.shape}")
    img_node = Array(reshape=(28, 28))
    root_node = Series(img_node)
    img_node.addfilter(Rotation("angle"))
    result = root_node.generate_error(xs, {'angle': angle})

    plt.matshow(result[0].reshape((28, 28)))
    plt.show()
Beispiel #5
0
def test_visualizing_series_and_array_nodes():
    x_node = Array()
    series_node = Series(x_node)
    path = plotting_utils.visualize_error_generator(series_node, False)
    file = open(path, 'r')
    data = file.read()
    assert re.compile(r'1.*Series').search(data)
    assert re.compile(r'1 -> 2').search(data)
Beispiel #6
0
def test_apply_with_probability():
    data = np.array([["a"], ["a"], ["a"], ["a"], ["a"], ["a"], ["a"], ["a"],
                     ["a"], ["a"]])

    ocr = OCRError("ps", "p")
    x_node = Array()
    x_node.addfilter(ApplyWithProbability(ocr, "ocr_prob"))
    series_node = Series(x_node)
    params = {"ps": {"a": [["e"], [1.0]]}, "p": 1.0, "ocr_prob": 0.5}
    out = series_node.generate_error(data, params,
                                     np.random.RandomState(seed=42))

    contains_distinct_elements = False
    for a in out:
        for b in out:
            if a != b:
                contains_distinct_elements = True
    assert contains_distinct_elements
Beispiel #7
0
def get_err_root_node():
    err_node = Array()
    err_root_node = Series(err_node)
    # err_node.addfilter(GaussianNoise("mean", "std"))
    # err_node.addfilter(Blur_Gaussian("std"))
    # err_node.addfilter(Snow("snowflake_probability", "snowflake_alpha", "snowstorm_alpha"))
    # err_node.addfilter(FastRain("probability", "range"))
    # err_node.addfilter(StainArea("probability", "radius_generator", "transparency_percentage"))
    # err_node.addfilter(JPEG_Compression("quality"))
    err_node.addfilter(Resolution("k"))
    # err_node.addfilter(Brightness("tar", "rat", "range"))
    # err_node.addfilter(Identity())
    return err_root_node
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import numpy as np
from dpemu.nodes import Array, Series
from dpemu.filters.text import OCRError
from dpemu.filters.common import ApplyWithProbability

data = np.array([["a"], ["a"], ["a"], ["a"], ["a"], ["a"], ["a"], ["a"], ["a"],
                 ["a"]])

params = {"a": [["e"], [1.0]]}
ocr = OCRError("ocr_params", "ocr_p")

x_node = Array()
x_node.addfilter(ApplyWithProbability(ocr, 'p'))
root_node = Series(x_node)

out = root_node.generate_error(data, {
    'ocr_params': params,
    'ocr_p': 1.0,
    'p': 0.5
})

print(out)
print("output shape:", out.shape, ", output dtype:", out.dtype)
Beispiel #9
0
# data = np.genfromtxt(datafile, delimiter=',')

# Suppose we have 10 sensors and 100 data points from each
# (each data point corresponding to, say, a different day)
observations, sensors = 100, 10

# Create a matrix of (random) data to use as input
data = np.random.randn(observations, sensors)

# Create an Array object to represent the battery of 10 sensors
sensor_array = Array()

# Add a Missing filters to randomly transform elements to Nan
# (NaN = "not a number", i.e. missing or invalid data)
sensor_array.addfilter(Missing("prob", "val"))

# Create a series to represent the 100 data points
root_node = Series(sensor_array)

# The data model tree is now complete.
# Process the data to introduce errors
output = root_node.generate_error(data, {'prob': .3, 'val': np.nan})

# Sanity check: does the shape of the output equal that of the input?
print("input data has shape", data.shape)
print("output data has shape", output.shape)

# The relative frequency on NaNs should be close to the probability
# given as a parameter to the Missing filters
print("relative frequency of NaNs:", np.isnan(output).sum() / output.size)