def setup_class(cls): meta = CollectionMetadata.from_file(meta_path) meta["PUMS.PUMS"].censor_dims = False df = pd.read_csv(csv_path) reader = PandasReader(df, meta) private_reader = PrivateReader(reader, meta, 10.0, 10E-3) cls.reader = private_reader
def setup_class(self): meta = CollectionMetadata.from_file(meta_path) meta["PUMS.PUMS"].censor_dims = False meta["PUMS.PUMS"]["sex"].type = "int" meta["PUMS.PUMS"]["educ"].type = "int" meta["PUMS.PUMS"]["married"].type = "bool" df = pd.read_csv(csv_path) reader = PandasReader(df, meta) private_reader = PrivateReader(reader, meta, 10.0, 10E-3) self.reader = private_reader
def test_calculate_multiplier(self): pums_meta_path = os.path.join( git_root_dir, os.path.join("service", "datasets", "PUMS.yaml")) pums_csv_path = os.path.join( git_root_dir, os.path.join("service", "datasets", "PUMS.csv")) pums_schema = CollectionMetadata.from_file(pums_meta_path) pums_df = pd.read_csv(pums_csv_path) pums_reader = PandasReader(pums_df, pums_schema) query = "SELECT COUNT(*) FROM PUMS.PUMS" cost = PrivateReader.get_budget_multiplier(pums_schema, pums_reader, query) query = "SELECT AVG(age) FROM PUMS.PUMS" cost_avg = PrivateReader.get_budget_multiplier(pums_schema, pums_reader, query) assert 1 + cost == cost_avg
import pandas as pd from opendp.smartnoise.sql import PostgresReader, PrivateReader from opendp.smartnoise.metadata import CollectionMetadata meta = CollectionMetadata.from_file('PUMS_large.yaml') query = 'SELECT married, AVG(income) AS income, COUNT(*) AS n FROM PUMS.PUMS_large GROUP BY married' query = 'SELECT AVG(age) FROM PUMS.PUMS_large' reader = PostgresReader('127.0.0.1', 'PUMS', 'postgres') private_reader = PrivateReader(reader, meta, 1.0) exact = reader.execute_typed(query) print(exact) private = private_reader.execute_typed(query) print(private)
from opendp.smartnoise.synthesizers.pytorch.nn import DPGAN, DPCTGAN, PATECTGAN except: import logging test_logger = logging.getLogger(__name__) test_logger.warning("Requires torch and torchdp") git_root_dir = subprocess.check_output( "git rev-parse --show-toplevel".split(" ")).decode("utf-8").strip() meta_path = os.path.join(git_root_dir, os.path.join("service", "datasets", "PUMS.yaml")) csv_path = os.path.join(git_root_dir, os.path.join("service", "datasets", "PUMS.csv")) schema = CollectionMetadata.from_file(meta_path) df = pd.read_csv(csv_path) @pytest.mark.torch class TestPytorchDPSynthesizer_DPGAN: def setup(self): self.dpgan = PytorchDPSynthesizer(DPGAN(), GeneralTransformer()) def test_fit(self): self.dpgan.fit(df) assert self.dpgan.gan.generator def test_sample(self): self.dpgan.fit(df) sample_size = len(df)
from os.path import dirname, join from opendp.smartnoise.metadata import CollectionMetadata from opendp.smartnoise.sql.parse import QueryParser dir_name = dirname(__file__) metadata = CollectionMetadata.from_file(join(dir_name, "Devices.yaml")) def qp(query_string): return QueryParser().query(query_string) # # Unit tests # class TestTypes: def test_s12(self): q = qp("SELECT Refurbished FROM Telemetry.Crashes;") q.load_symbols(metadata) print(str(q["Refurbished"])) assert q["Refurbished"].type() == "boolean" assert q["Refurbished"].sensitivity() == 1 def test_s13(self): q = qp("SELECT * FROM Telemetry.Crashes;") q.load_symbols(metadata) assert q["Refurbished"].type() == "boolean" assert q["Refurbished"].sensitivity() == 1 assert q["Temperature"].sensitivity() == 65.0
def _load_metadata(dataset_document): return CollectionMetadata.from_file( dataset_document.dataverse_details.local_metadata_path)
import pytest from opendp.smartnoise.metadata import CollectionMetadata from opendp.smartnoise.sql.parse import QueryParser from os import listdir from os.path import isfile, join, dirname dir_name = dirname(__file__) testpath = join(dir_name, "queries") + "/" metadata = CollectionMetadata.from_file(join(dir_name, "TestDB.yaml")) other_dirs = [ f for f in listdir(testpath) if not isfile(join(testpath, f)) and f != "parse" ] parse_files = [ join(testpath + "parse/", f) for f in listdir(testpath + "parse") if isfile(join(testpath + "parse", f)) ] good_files = [f for f in parse_files if not "_fail" in f] bad_files = [f for f in parse_files if "_fail" in f] for d in other_dirs: other_files = [ join(testpath + d + "/", f) for f in listdir(testpath + d) if isfile(join(testpath + d, f)) ] good_files.extend(other_files)