def test_tuple_node_works(): data = np.array([(0., 1.), (2., 3.)]) x_node = Tuple() x_node.addfilter(Missing("prob", "m_val")) series_node = Series(x_node) data = series_node.generate_error(data, {'prob': 1, 'm_val': np.nan}) assert np.isnan(data[0][0]) and np.isnan(data[0][1]) and np.isnan( data[1][0]) and np.isnan(data[1][1])
def test_series_and_array_work_with_regular_arrays(): a = [0.] x_node = Array() x_node.addfilter(Missing("prob", "m_val")) series_node = Series(x_node) params = {"prob": 1., "m_val": np.nan} out = series_node.generate_error(a, params) assert np.isnan(out[0])
def test_seed_determines_result_for_time_dependent_gaussian_noise(): a = np.arange(25).reshape((5, 5)).astype(np.float64) params = {} params['mean'] = 2. params['std'] = 3. params['mean_inc'] = 1. params['std_inc'] = 4. x_node = Array() x_node.addfilter( GaussianNoiseTimeDependent('mean', 'std', 'mean_inc', 'std_inc')) series_node = Series(x_node, dim_name="time") out1 = series_node.generate_error(a, params, np.random.RandomState(seed=42)) out2 = series_node.generate_error(a, params, np.random.RandomState(seed=42)) assert np.allclose(out1, out2)
def main(): """An example that rotates MNIST digits and displays one. Usage: python run_rotate_MNIST_example <angle> where <angle> is the angle of rotation (e.g. 90 to rotate by pi / 2) """ x, _, _, _ = load_mnist() xs = x[:20] # small subset of x angle = float(sys.argv[1]) print(f"x subset shape: {xs.shape}") img_node = Array(reshape=(28, 28)) root_node = Series(img_node) img_node.addfilter(Rotation("angle")) result = root_node.generate_error(xs, {'angle': angle}) plt.matshow(result[0].reshape((28, 28))) plt.show()
def test_visualizing_series_and_array_nodes(): x_node = Array() series_node = Series(x_node) path = plotting_utils.visualize_error_generator(series_node, False) file = open(path, 'r') data = file.read() assert re.compile(r'1.*Series').search(data) assert re.compile(r'1 -> 2').search(data)
def test_apply_with_probability(): data = np.array([["a"], ["a"], ["a"], ["a"], ["a"], ["a"], ["a"], ["a"], ["a"], ["a"]]) ocr = OCRError("ps", "p") x_node = Array() x_node.addfilter(ApplyWithProbability(ocr, "ocr_prob")) series_node = Series(x_node) params = {"ps": {"a": [["e"], [1.0]]}, "p": 1.0, "ocr_prob": 0.5} out = series_node.generate_error(data, params, np.random.RandomState(seed=42)) contains_distinct_elements = False for a in out: for b in out: if a != b: contains_distinct_elements = True assert contains_distinct_elements
def get_err_root_node(): err_node = Array() err_root_node = Series(err_node) # err_node.addfilter(GaussianNoise("mean", "std")) # err_node.addfilter(Blur_Gaussian("std")) # err_node.addfilter(Snow("snowflake_probability", "snowflake_alpha", "snowstorm_alpha")) # err_node.addfilter(FastRain("probability", "range")) # err_node.addfilter(StainArea("probability", "radius_generator", "transparency_percentage")) # err_node.addfilter(JPEG_Compression("quality")) err_node.addfilter(Resolution("k")) # err_node.addfilter(Brightness("tar", "rat", "range")) # err_node.addfilter(Identity()) return err_root_node
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import numpy as np from dpemu.nodes import Array, Series from dpemu.filters.text import OCRError from dpemu.filters.common import ApplyWithProbability data = np.array([["a"], ["a"], ["a"], ["a"], ["a"], ["a"], ["a"], ["a"], ["a"], ["a"]]) params = {"a": [["e"], [1.0]]} ocr = OCRError("ocr_params", "ocr_p") x_node = Array() x_node.addfilter(ApplyWithProbability(ocr, 'p')) root_node = Series(x_node) out = root_node.generate_error(data, { 'ocr_params': params, 'ocr_p': 1.0, 'p': 0.5 }) print(out) print("output shape:", out.shape, ", output dtype:", out.dtype)
# data = np.genfromtxt(datafile, delimiter=',') # Suppose we have 10 sensors and 100 data points from each # (each data point corresponding to, say, a different day) observations, sensors = 100, 10 # Create a matrix of (random) data to use as input data = np.random.randn(observations, sensors) # Create an Array object to represent the battery of 10 sensors sensor_array = Array() # Add a Missing filters to randomly transform elements to Nan # (NaN = "not a number", i.e. missing or invalid data) sensor_array.addfilter(Missing("prob", "val")) # Create a series to represent the 100 data points root_node = Series(sensor_array) # The data model tree is now complete. # Process the data to introduce errors output = root_node.generate_error(data, {'prob': .3, 'val': np.nan}) # Sanity check: does the shape of the output equal that of the input? print("input data has shape", data.shape) print("output data has shape", output.shape) # The relative frequency on NaNs should be close to the probability # given as a parameter to the Missing filters print("relative frequency of NaNs:", np.isnan(output).sum() / output.size)