def test_column_names(example_table: DataFrame): """check to ensure column names correspond to the domains""" domains = {'Sepal Length': RealDataDomain(0., 10.), 'Sepal Width': RealDataDomain(0., 10.), 'Petal Length': RealDataDomain(0., 10.), 'Petal Width': RealDataDomain(0., 10.), 'Class': CategoricalDataDomain(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])} t = PrivateTable(example_table, domains, PrivacyBudget(1.0, 0.)) assert 'Sepal Length' in t._columns assert 'Sepal Width' in t._columns assert 'Petal Length' in t._columns assert 'Petal Width' in t._columns assert 'Class' in t._columns
def example_private_table(): adult_data = pd.read_csv(os.path.join("dataset", "adult_data.txt"), names=[ "Age", "Workclass", "fnlwgt", "Education", "Education-Num", "Martial Status", "Occupation", "Relationship", "Race", "Sex", "Capital Gain", "Capital Loss", "Hours per week", "Country", "Target" ]) data = { 'Age': adult_data["Age"].tolist(), 'Education': adult_data["Education"].tolist() } df = pd.DataFrame(data) domains = { 'Age': RealDataDomain(17., 90.), 'Education': CategoricalDataDomain([ ' Bachelors', ' HS-grad', ' 11th', ' Masters', ' 9th', ' Some-college', ' Assoc-acdm', ' Assoc-voc', ' 7th-8th', ' Doctorate', ' Prof-school', ' 5th-6th', ' 10th', ' 1st-4th', ' Preschool', ' 12th' ]) } return PrivateTable(df, domains, PrivacyBudget(100000.0, 1.))
def example_private_table(): iris_data = pd.read_csv(os.path.join("dataset", "iris_data.txt"), names=["Sepal Length", "Sepal Width", "Petal Length", "Petal Width", "Class"]) data = {'Sepal Length': iris_data["Sepal Length"].tolist(), 'Sepal Width': iris_data["Sepal Width"].tolist(), 'Petal Length': iris_data["Petal Length"].tolist(), 'Petal Width': iris_data["Petal Width"].tolist(), 'Class': iris_data["Class"].tolist()} df = pd.DataFrame(data) domains = {'Sepal Length': RealDataDomain(0., 10.), 'Sepal Width': RealDataDomain(0., 10.), 'Petal Length': RealDataDomain(0., 10.), 'Petal Width': RealDataDomain(0., 10.), 'Class': CategoricalDataDomain(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])} return PrivateTable(df, domains, PrivacyBudget(100000.0, 1.))
def test_column_names(example_table: DataFrame): domains = { 'Name': CategoricalDataDomain( ['Tom', 'Jack', 'Steve', 'Eve', 'Adam', 'Lucifer']), 'Age': RealDataDomain(0., 130.) } t = PrivateTable(example_table, domains, PrivacyBudget(1.0, 0.)) assert 'Age' in t._columns assert 'Name' in t._columns
def example_private_table(): data = {'Name': ['Tom', 'Jack', 'Steve', 'Jack'], 'Age': [28, 34, 29, 42]} df = pd.DataFrame(data) domains = { 'Name': CategoricalDataDomain( ['Tom', 'Jack', 'Steve', 'Eve', 'Adam', 'Lucifer']), 'Age': RealDataDomain(0., 130.) } return PrivateTable(df, domains, PrivacyBudget(100000.0, 1000.))
def test_column_names(example_table: DataFrame): """check to ensure column names correspond to the domains""" domains = { 'Age': RealDataDomain(0., 130.), 'Education': CategoricalDataDomain([ ' Bachelors', ' HS-grad', ' 11th', ' Masters', ' 9th', ' Some-college', ' Assoc-acdm', ' Assoc-voc', ' 7th-8th', ' Doctorate', ' Prof-school', ' 5th-6th', ' 10th', ' 1st-4th', ' Preschool', ' 12th' ]) } t = PrivateTable(example_table, domains, PrivacyBudget(1.0, 0.)) assert 'Age' in t._columns assert 'Education' in t._columns
def test_real_data_domain(): dd = RealDataDomain(-1., 1.) assert dd.contains(0.) == True assert dd.contains(-2.) == False assert dd.contains(3.) == False