Ejemplo n.º 1
0
 def setup_class(cls):
     meta = CollectionMetadata.from_file(meta_path)
     meta["PUMS.PUMS"].censor_dims = False
     df = pd.read_csv(csv_path)
     reader = PandasReader(df, meta)
     private_reader = PrivateReader(reader, meta, 10.0, 10E-3)
     cls.reader = private_reader
Ejemplo n.º 2
0
 def setup_class(self):
     meta = CollectionMetadata.from_file(meta_path)
     meta["PUMS.PUMS"].censor_dims = False
     meta["PUMS.PUMS"]["sex"].type = "int"
     meta["PUMS.PUMS"]["educ"].type = "int"
     meta["PUMS.PUMS"]["married"].type = "bool"
     df = pd.read_csv(csv_path)
     reader = PandasReader(df, meta)
     private_reader = PrivateReader(reader, meta, 10.0, 10E-3)
     self.reader = private_reader
    def test_calculate_multiplier(self):
        pums_meta_path = os.path.join(
            git_root_dir, os.path.join("service", "datasets", "PUMS.yaml"))
        pums_csv_path = os.path.join(
            git_root_dir, os.path.join("service", "datasets", "PUMS.csv"))
        pums_schema = CollectionMetadata.from_file(pums_meta_path)
        pums_df = pd.read_csv(pums_csv_path)
        pums_reader = PandasReader(pums_df, pums_schema)
        query = "SELECT COUNT(*) FROM PUMS.PUMS"
        cost = PrivateReader.get_budget_multiplier(pums_schema, pums_reader,
                                                   query)

        query = "SELECT AVG(age) FROM PUMS.PUMS"
        cost_avg = PrivateReader.get_budget_multiplier(pums_schema,
                                                       pums_reader, query)
        assert 1 + cost == cost_avg
def test_sklearn_query():
    sklearn_dataset = sklearn.datasets.load_iris()
    sklearn_df = pd.DataFrame(data=sklearn_dataset.data,
                              columns=sklearn_dataset.feature_names)

    iris = Table("dbo", "iris", [
        Float("sepal length (cm)", 4, 8),
        Float("sepal width (cm)", 2, 5),
        Float("petal length (cm)", 1, 7),
        Float("petal width (cm)", 0, 3)
    ], 150)
    schema = CollectionMetadata([iris], "csv")

    reader = PandasReader(sklearn_df, schema)
    rowset = execute_private_query(
        schema, reader, 0.3, 'SELECT AVG("petal width (cm)") FROM dbo.iris')
    df = pd.DataFrame(rowset[1:], columns=rowset[0])
    assert df is not None
    assert len(df) == 1
def test_sklearn_query():
    sklearn_dataset = sklearn.datasets.load_iris()
    sklearn_df = pd.DataFrame(data=sklearn_dataset.data,
                              columns=sklearn_dataset.feature_names)

    iris = Table("dbo", "iris", [
        Float("sepal length (cm)", 4, 8),
        Float("sepal width (cm)", 2, 5),
        Float("petal length (cm)", 1, 7),
        Float("petal width (cm)", 0, 3)
    ], 150)
    schema = CollectionMetadata([iris], "csv")

    reader = PandasReader(sklearn_df, schema)
    # Calling both times for back compat check
    for params in ([reader, schema], [schema, reader]):
        df = execute_private_query(
            *params, 0.3, 'SELECT AVG("petal width (cm)") FROM dbo.iris')
        assert df is not None
        assert len(df) == 1
Ejemplo n.º 6
0
iris_dataset_path = os.path.join(root_url, "service", "datasets", "iris.csv")
if not os.path.exists(iris_dataset_path):
    sklearn_dataset = sklearn.datasets.load_iris()
    sklearn_df = pd.DataFrame(data=sklearn_dataset.data,
                              columns=sklearn_dataset.feature_names)
    sklearn_df.to_csv(iris_dataset_path)

iris_schema_path = os.path.join(root_url, "service", "datasets", "iris.yaml")
if not os.path.exists(iris_schema_path):
    iris = Table("iris", "iris", [
        Float("sepal length (cm)", 4, 8),
        Float("sepal width (cm)", 2, 5),
        Float("petal length (cm)", 1, 7),
        Float("petal width (cm)", 0, 3)
    ], 150)
    schema = CollectionMetadata([iris], "csv")
    schema.to_file(iris_schema_path, "iris")


def find_ngrams(input_list, n):
    return input_list if n == 1 else list(
        zip(*[input_list[i:] for i in range(n)]))


def _download_file(url, local_file):
    try:
        from urllib import urlretrieve
    except ImportError:
        from urllib.request import urlretrieve
    urlretrieve(url, local_file)
import pandas as pd
from opendp.smartnoise.sql import PostgresReader, PrivateReader
from opendp.smartnoise.metadata import CollectionMetadata

meta = CollectionMetadata.from_file('PUMS_large.yaml')

query = 'SELECT married, AVG(income) AS income, COUNT(*) AS n FROM PUMS.PUMS_large GROUP BY married'
query = 'SELECT AVG(age) FROM PUMS.PUMS_large'

reader = PostgresReader('127.0.0.1', 'PUMS', 'postgres')
private_reader = PrivateReader(reader, meta, 1.0)

exact = reader.execute_typed(query)
print(exact)

private = private_reader.execute_typed(query)
print(private)
Ejemplo n.º 8
0
    from opendp.smartnoise.synthesizers.pytorch.nn import DPGAN, DPCTGAN, PATECTGAN

except:
    import logging
    test_logger = logging.getLogger(__name__)
    test_logger.warning("Requires torch and torchdp")

git_root_dir = subprocess.check_output(
    "git rev-parse --show-toplevel".split(" ")).decode("utf-8").strip()

meta_path = os.path.join(git_root_dir,
                         os.path.join("service", "datasets", "PUMS.yaml"))
csv_path = os.path.join(git_root_dir,
                        os.path.join("service", "datasets", "PUMS.csv"))

schema = CollectionMetadata.from_file(meta_path)
df = pd.read_csv(csv_path)


@pytest.mark.torch
class TestPytorchDPSynthesizer_DPGAN:
    def setup(self):
        self.dpgan = PytorchDPSynthesizer(DPGAN(), GeneralTransformer())

    def test_fit(self):
        self.dpgan.fit(df)
        assert self.dpgan.gan.generator

    def test_sample(self):
        self.dpgan.fit(df)
        sample_size = len(df)
Ejemplo n.º 9
0
from os.path import dirname, join

from opendp.smartnoise.metadata import CollectionMetadata
from opendp.smartnoise.sql.parse import QueryParser

dir_name = dirname(__file__)

metadata = CollectionMetadata.from_file(join(dir_name, "Devices.yaml"))

def qp(query_string):
    return QueryParser().query(query_string)

#
#   Unit tests
#
class TestTypes:

    def test_s12(self):
            q = qp("SELECT Refurbished FROM Telemetry.Crashes;")
            q.load_symbols(metadata)
            print(str(q["Refurbished"]))
            assert q["Refurbished"].type() == "boolean"
            assert q["Refurbished"].sensitivity() == 1

    def test_s13(self):
            q = qp("SELECT * FROM Telemetry.Crashes;")
            q.load_symbols(metadata)
            assert q["Refurbished"].type() == "boolean"
            assert q["Refurbished"].sensitivity() == 1
            assert q["Temperature"].sensitivity() == 65.0
Ejemplo n.º 10
0
 def _load_metadata(dataset_document):
     return CollectionMetadata.from_file(
         dataset_document.dataverse_details.local_metadata_path)
Ejemplo n.º 11
0
import pytest
from opendp.smartnoise.metadata import CollectionMetadata
from opendp.smartnoise.sql.parse import QueryParser

from os import listdir
from os.path import isfile, join, dirname

dir_name = dirname(__file__)
testpath = join(dir_name, "queries") + "/"

metadata = CollectionMetadata.from_file(join(dir_name, "TestDB.yaml"))

other_dirs = [
    f for f in listdir(testpath)
    if not isfile(join(testpath, f)) and f != "parse"
]

parse_files = [
    join(testpath + "parse/", f) for f in listdir(testpath + "parse")
    if isfile(join(testpath + "parse", f))
]
good_files = [f for f in parse_files if not "_fail" in f]
bad_files = [f for f in parse_files if "_fail" in f]

for d in other_dirs:
    other_files = [
        join(testpath + d + "/", f) for f in listdir(testpath + d)
        if isfile(join(testpath + d, f))
    ]
    good_files.extend(other_files)