예제 #1
0
 def setup_class(cls):
     meta = CollectionMetadata.from_file(meta_path)
     meta["PUMS.PUMS"].censor_dims = False
     df = pd.read_csv(csv_path)
     reader = PandasReader(df, meta)
     private_reader = PrivateReader(reader, meta, 10.0, 10E-3)
     cls.reader = private_reader
예제 #2
0
 def setup_class(self):
     meta = CollectionMetadata.from_file(meta_path)
     meta["PUMS.PUMS"].censor_dims = False
     meta["PUMS.PUMS"]["sex"].type = "int"
     meta["PUMS.PUMS"]["educ"].type = "int"
     meta["PUMS.PUMS"]["married"].type = "bool"
     df = pd.read_csv(csv_path)
     reader = PandasReader(df, meta)
     private_reader = PrivateReader(reader, meta, 10.0, 10E-3)
     self.reader = private_reader
    def test_calculate_multiplier(self):
        pums_meta_path = os.path.join(
            git_root_dir, os.path.join("service", "datasets", "PUMS.yaml"))
        pums_csv_path = os.path.join(
            git_root_dir, os.path.join("service", "datasets", "PUMS.csv"))
        pums_schema = CollectionMetadata.from_file(pums_meta_path)
        pums_df = pd.read_csv(pums_csv_path)
        pums_reader = PandasReader(pums_df, pums_schema)
        query = "SELECT COUNT(*) FROM PUMS.PUMS"
        cost = PrivateReader.get_budget_multiplier(pums_schema, pums_reader,
                                                   query)

        query = "SELECT AVG(age) FROM PUMS.PUMS"
        cost_avg = PrivateReader.get_budget_multiplier(pums_schema,
                                                       pums_reader, query)
        assert 1 + cost == cost_avg
import pandas as pd
from opendp.smartnoise.sql import PostgresReader, PrivateReader
from opendp.smartnoise.metadata import CollectionMetadata

meta = CollectionMetadata.from_file('PUMS_large.yaml')

query = 'SELECT married, AVG(income) AS income, COUNT(*) AS n FROM PUMS.PUMS_large GROUP BY married'
query = 'SELECT AVG(age) FROM PUMS.PUMS_large'

reader = PostgresReader('127.0.0.1', 'PUMS', 'postgres')
private_reader = PrivateReader(reader, meta, 1.0)

exact = reader.execute_typed(query)
print(exact)

private = private_reader.execute_typed(query)
print(private)
예제 #5
0
    from opendp.smartnoise.synthesizers.pytorch.nn import DPGAN, DPCTGAN, PATECTGAN

except:
    import logging
    test_logger = logging.getLogger(__name__)
    test_logger.warning("Requires torch and torchdp")

git_root_dir = subprocess.check_output(
    "git rev-parse --show-toplevel".split(" ")).decode("utf-8").strip()

meta_path = os.path.join(git_root_dir,
                         os.path.join("service", "datasets", "PUMS.yaml"))
csv_path = os.path.join(git_root_dir,
                        os.path.join("service", "datasets", "PUMS.csv"))

schema = CollectionMetadata.from_file(meta_path)
df = pd.read_csv(csv_path)


@pytest.mark.torch
class TestPytorchDPSynthesizer_DPGAN:
    def setup(self):
        self.dpgan = PytorchDPSynthesizer(DPGAN(), GeneralTransformer())

    def test_fit(self):
        self.dpgan.fit(df)
        assert self.dpgan.gan.generator

    def test_sample(self):
        self.dpgan.fit(df)
        sample_size = len(df)
예제 #6
0
from os.path import dirname, join

from opendp.smartnoise.metadata import CollectionMetadata
from opendp.smartnoise.sql.parse import QueryParser

dir_name = dirname(__file__)

metadata = CollectionMetadata.from_file(join(dir_name, "Devices.yaml"))

def qp(query_string):
    return QueryParser().query(query_string)

#
#   Unit tests
#
class TestTypes:

    def test_s12(self):
            q = qp("SELECT Refurbished FROM Telemetry.Crashes;")
            q.load_symbols(metadata)
            print(str(q["Refurbished"]))
            assert q["Refurbished"].type() == "boolean"
            assert q["Refurbished"].sensitivity() == 1

    def test_s13(self):
            q = qp("SELECT * FROM Telemetry.Crashes;")
            q.load_symbols(metadata)
            assert q["Refurbished"].type() == "boolean"
            assert q["Refurbished"].sensitivity() == 1
            assert q["Temperature"].sensitivity() == 65.0
 def _load_metadata(dataset_document):
     return CollectionMetadata.from_file(
         dataset_document.dataverse_details.local_metadata_path)
예제 #8
0
import pytest
from opendp.smartnoise.metadata import CollectionMetadata
from opendp.smartnoise.sql.parse import QueryParser

from os import listdir
from os.path import isfile, join, dirname

dir_name = dirname(__file__)
testpath = join(dir_name, "queries") + "/"

metadata = CollectionMetadata.from_file(join(dir_name, "TestDB.yaml"))

other_dirs = [
    f for f in listdir(testpath)
    if not isfile(join(testpath, f)) and f != "parse"
]

parse_files = [
    join(testpath + "parse/", f) for f in listdir(testpath + "parse")
    if isfile(join(testpath + "parse", f))
]
good_files = [f for f in parse_files if not "_fail" in f]
bad_files = [f for f in parse_files if "_fail" in f]

for d in other_dirs:
    other_files = [
        join(testpath + d + "/", f) for f in listdir(testpath + d)
        if isfile(join(testpath + d, f))
    ]
    good_files.extend(other_files)