Exemple #1
0
import pandas as pd
from pandasql import sqldf
import math

from opendp.whitenoise.metadata import CollectionMetadata
from opendp.whitenoise.sql import PrivateReader, PandasReader
from opendp.whitenoise.sql.parse import QueryParser
from opendp.whitenoise.reader.rowset import TypedRowset

git_root_dir = subprocess.check_output("git rev-parse --show-toplevel".split(" ")).decode("utf-8").strip()

meta_path = os.path.join(git_root_dir, os.path.join("service", "datasets", "PUMS.yaml"))
csv_path = os.path.join(git_root_dir, os.path.join("service", "datasets", "PUMS.csv"))

schema = CollectionMetadata.from_file(meta_path)
df = pd.read_csv(csv_path)

#   Unit tests
#
class TestQuery:
    def test_count_exact(self):
        reader = PandasReader(schema, df)
        rs = reader.execute("SELECT COUNT(*) AS c FROM PUMS.PUMS")
        assert(rs[1][0] == 1000)
    def test_empty_result(self):
        reader = PandasReader(schema, df)
        rs = reader.execute("SELECT age as a FROM PUMS.PUMS WHERE age > 100")
        assert(len(rs) == 1)
    def test_empty_result_typed(self):
        reader = PandasReader(schema, df)
import pandas as pd
from opendp.whitenoise.sql import PandasReader, PrivateReader
from opendp.whitenoise.metadata import CollectionMetadata

pums = pd.read_csv('PUMS.csv')
meta = CollectionMetadata.from_file('PUMS.yaml')

query = 'SELECT married, AVG(income) AS income, COUNT(*) AS n FROM PUMS.PUMS GROUP BY married'

query = 'SELECT COUNT(*) AS n, COUNT(pid) AS foo FROM PUMS.PUMS WHERE age > 80 GROUP BY educ'

reader = PandasReader(meta, pums)
private_reader = PrivateReader(meta, reader, 4.0)
private_reader.options.censor_dims = True
private_reader.options.clamp_counts = True

exact = reader.execute_typed(query)
print(exact)

private = private_reader.execute_typed(query)
print(private)
validate_files = [
    join(testpath + "validate/", f) for f in listdir(testpath + "validate")
    if isfile(join(testpath + "validate", f))
]

good_files = [f for f in validate_files if not "_fail" in f]
bad_files = [f for f in validate_files if "_fail" in f]

for d in other_dirs:
    other_files = [
        join(testpath + d + "/", f) for f in listdir(testpath + d)
        if isfile(join(testpath + d, f))
    ]
    good_files.extend(other_files)

metadata = CollectionMetadata.from_file(join(dir_name, "Devices.yaml"))


#
#   Unit tests
#
class TestValidate:
    def test_all_good_queries(self):
        for goodpath in good_files:
            print(goodpath)
            gqt = GoodQueryTester(goodpath)
            gqt.runValidate()

    def test_all_bad_queries(self):
        for badpath in bad_files:
            bqt = BadQueryTester(badpath)
 def _load_metadata(dataset_document):
     return CollectionMetadata.from_file(
         dataset_document.dataverse_details.local_metadata_path)