Example #1
0
def test_engine_urandom(df, df_das_stub):
    """ Test the engine run with random results. Check counts and that there is some noise, but not large"""
    config = ConfigParser()
    config.read_file(io.StringIO(engineurandom + confstringddict + laplacedp))

    # Create the engine
    e = spark_sql_das_engine.engine(config=config,
                                    name='engine',
                                    das=df_das_stub)
    original_data = {'original_data': {'inputtable': df}}

    # Run the engine
    assert e.willRun()

    private_tables = e.run(original_data)[0]
    true_tables = original_data["true_tables"]

    # Create the engine
    e1 = spark_sql_das_engine.engine(config=config,
                                     name='engine',
                                     das=df_das_stub)
    original_data = {'original_data': {'inputtable': df}}

    # Run the engine
    assert e1.willRun()

    private_tables1 = e1.run(original_data)[0]

    for tname, table in private_tables.items():
        # Check counts
        assert (table.as_matrix().shape[0] == 4)
        # Check that tables are identical
        assert sum(
            sum(
                abs(private_tables[tname].as_matrix() -
                    true_tables[tname].as_matrix()))) > 0.1
        assert sum(
            sum(
                abs(private_tables[tname].as_matrix() -
                    true_tables[tname].as_matrix()))) < 50
        assert sum(
            sum(
                abs(private_tables[tname].as_matrix() -
                    private_tables1[tname].as_matrix()))) > 0.1
        assert sum(
            sum(
                abs(private_tables[tname].as_matrix() -
                    private_tables1[tname].as_matrix()))) < 50
        assert sum((abs(private_tables[tname].as_matrix() -
                        true_tables[tname].as_matrix()) /
                    private_tables[tname].as_matrix())[:, 0]) < 0.1
def test_engine_create_noisifiers():
    """ Test noisifiers creation structure and epsilon calculations"""

    config = ConfigParser()
    config.read_file(io.StringIO(variosalgos + config4engine))
    e = spark_sql_das_engine.engine(config=config)
    assert repr(e.noisifiers['t'][0][0]) == algreprs[1]
    assert repr(e.noisifiers['t'][0][1]) == algreprs[2]
    assert repr(e.noisifiers['t'][1][0]) == algreprs[3]
    assert repr(
        e.noisifiers['t2'][0][0]
    ) == "SmoothLaplaceAlgorithm:{'varname': 'a', 'alpha': 0.05, 'algorithm': 'SmoothLaplace', 'delta': 0.05, 'epsilon': 0.4}"
    assert repr(e.noisifiers['t2'][0][1]) == algreprs[1]

    # Calculate master epsilons:
    # epsilon set in [engine] is 1.0, every variable is assigned fraction of 0.4 (a in t2 set to 0.3, but re-assigned 0.4
    # because it is composable with b)

    # in table 't': b and c are composable so get 0.4 (no sum), another 0.4 for d - 0.8 total
    assert e.table_epsilons['t'] == 0.8

    # in table 't2': a and b are composable, so 0.4
    assert e.table_epsilons['t2'] == 0.4

    # and the total is 0.8 + 0.4 = 1.2
    assert abs(e.epsilon_effective - 1.2) < 1e-5
Example #3
0
def test_engine_reproducible(df, df_das_stub):
    """ Test the engine run with reproducible results. Check counts that noisy tables are those that come of the seed """
    config = ConfigParser()
    config.read_file(io.StringIO(engineseeded + confstringddict + laplacedp))
    original_data = {'original_data': {'inputtable': df}}

    # Create the engine
    e = spark_sql_das_engine.engine(config=config,
                                    name='engine',
                                    das=df_das_stub)

    # Run the engine
    assert e.willRun()

    np.random.seed(e.getint("seed", default=101, section="engine"))
    private_tables = e.run(original_data)[0]
    for tname, table in private_tables.items():
        # Check counts
        assert (table.as_matrix().shape[0] == 4)
        # Check that tables are identical
        assert sum(
            sum(
                abs(private_tables[tname].as_matrix() - np.array(
                    [[100.03334714, 1.46291515], [800.15237662, 9.10199919],
                     [797.13439125, 7.51212961], [898.93010144, 10.54752546]]))
            )) < 1e-7
Example #4
0
def test_noisealg_repr(df_das_stub):
    """ Test the __repr__ and noisify functions of the NoiseAlgorithms, and their creation by engine"""

    # Make config
    config = ConfigParser()
    config.read_file(io.StringIO(variosalgos))

    # Make dummy engine
    e = spark_sql_das_engine.engine(config=config,
                                    name='engine',
                                    das=df_das_stub)

    # Make all the supported noise algorithms
    algs = [
        e.create_noise_algorithm_by_name("t", var) for var in list("abcdef")
    ]

    # Compare __repr__ outputs to what they are supposed to be
    for i, alg in enumerate(algs):
        assert algreprs[i] == repr(alg)

    # Test the noisify functions (the answers are calculated by looking at algorithm descriptions, not the code)
    assert algs[0].noisify([{'a': 100}]) == 100
    assert abs(algs[1].noisify([{
        'b': 100,
        'ssmax': 200
    }, {
        'b': .816062
    }]) - 150) < 1e-3
    assert abs(algs[2].noisify([{
        'c': 100,
        'ssmax': 200
    }, {
        'c': .10972505
    }]) - 1406.3203865411335) < 1e-3
    assert abs(algs[3].noisify([{
        'd': 100,
        'ssmax': 200
    }, {
        'd': .75
    }]) - 122.10774911533412) < 1e-3
    assert abs(algs[4].noisify([{'e': 100}, {'e': .816062}]) - 101) < 1e-3
    assert abs(algs[5].noisify([{
        'f': 100,
        'ssmax': 200,
        'sectop': 150,
        'numret': 50
    }]) - 0) < 1e-3
    assert abs(algs[5].noisify([{
        'f': 1000,
        'ssmax': 20,
        'sectop': 15,
        'numret': 50
    }]) - 1000) < 1e-3

    return
def test_engine_nonoise(df):
    """ Test the engine run with no noise. Check counts and that true and noisy tables are identical """

    config = ConfigParser()
    config.read_file(io.StringIO(engineurandom + confstringddict + nonoise))

    # Create the engine
    e = spark_sql_das_engine.engine(config=config)
    original_data = {'original_data': {'inputtable': df}}

    # Run the engine
    assert e.willRun()

    private_tables = e.run(original_data)[0]
    true_tables = original_data["true_tables"]

    for tname, table in private_tables.items():
        # Check counts
        assert (table.as_matrix().shape[0] == 4)
        # Check that tables are identical
        assert (true_tables[tname].as_matrix() ==
                private_tables[tname].as_matrix()).all()