def test_column_names(example_table: DataFrame):
    """check to ensure column names correspond to the domains"""
    domains = {'Sepal Length': RealDataDomain(0., 10.),
               'Sepal Width': RealDataDomain(0., 10.),
               'Petal Length': RealDataDomain(0., 10.),
               'Petal Width': RealDataDomain(0., 10.),
               'Class': CategoricalDataDomain(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])}
    t = PrivateTable(example_table, domains, PrivacyBudget(1.0, 0.))
    assert 'Sepal Length' in t._columns
    assert 'Sepal Width' in t._columns
    assert 'Petal Length' in t._columns
    assert 'Petal Width' in t._columns
    assert 'Class' in t._columns
Exemple #2
0
def example_private_table():
    adult_data = pd.read_csv(os.path.join("dataset", "adult_data.txt"),
                             names=[
                                 "Age", "Workclass", "fnlwgt", "Education",
                                 "Education-Num", "Martial Status",
                                 "Occupation", "Relationship", "Race", "Sex",
                                 "Capital Gain", "Capital Loss",
                                 "Hours per week", "Country", "Target"
                             ])
    data = {
        'Age': adult_data["Age"].tolist(),
        'Education': adult_data["Education"].tolist()
    }
    df = pd.DataFrame(data)
    domains = {
        'Age':
        RealDataDomain(17., 90.),
        'Education':
        CategoricalDataDomain([
            ' Bachelors', ' HS-grad', ' 11th', ' Masters', ' 9th',
            ' Some-college', ' Assoc-acdm', ' Assoc-voc', ' 7th-8th',
            ' Doctorate', ' Prof-school', ' 5th-6th', ' 10th', ' 1st-4th',
            ' Preschool', ' 12th'
        ])
    }
    return PrivateTable(df, domains, PrivacyBudget(100000.0, 1.))
def example_private_table():
    iris_data = pd.read_csv(os.path.join("dataset", "iris_data.txt"),
                            names=["Sepal Length", "Sepal Width", "Petal Length",
                                   "Petal Width", "Class"])
    data = {'Sepal Length': iris_data["Sepal Length"].tolist(),
            'Sepal Width': iris_data["Sepal Width"].tolist(),
            'Petal Length': iris_data["Petal Length"].tolist(),
            'Petal Width': iris_data["Petal Width"].tolist(),
            'Class': iris_data["Class"].tolist()}
    df = pd.DataFrame(data)
    domains = {'Sepal Length': RealDataDomain(0., 10.),
               'Sepal Width': RealDataDomain(0., 10.),
               'Petal Length': RealDataDomain(0., 10.),
               'Petal Width': RealDataDomain(0., 10.),
               'Class': CategoricalDataDomain(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])}
    return PrivateTable(df, domains, PrivacyBudget(100000.0, 1.))
Exemple #4
0
def test_column_names(example_table: DataFrame):
    domains = {
        'Name':
        CategoricalDataDomain(
            ['Tom', 'Jack', 'Steve', 'Eve', 'Adam', 'Lucifer']),
        'Age':
        RealDataDomain(0., 130.)
    }
    t = PrivateTable(example_table, domains, PrivacyBudget(1.0, 0.))
    assert 'Age' in t._columns
    assert 'Name' in t._columns
Exemple #5
0
def example_private_table():
    data = {'Name': ['Tom', 'Jack', 'Steve', 'Jack'], 'Age': [28, 34, 29, 42]}
    df = pd.DataFrame(data)
    domains = {
        'Name':
        CategoricalDataDomain(
            ['Tom', 'Jack', 'Steve', 'Eve', 'Adam', 'Lucifer']),
        'Age':
        RealDataDomain(0., 130.)
    }
    return PrivateTable(df, domains, PrivacyBudget(100000.0, 1000.))
Exemple #6
0
def test_column_names(example_table: DataFrame):
    """check to ensure column names correspond to the domains"""
    domains = {
        'Age':
        RealDataDomain(0., 130.),
        'Education':
        CategoricalDataDomain([
            ' Bachelors', ' HS-grad', ' 11th', ' Masters', ' 9th',
            ' Some-college', ' Assoc-acdm', ' Assoc-voc', ' 7th-8th',
            ' Doctorate', ' Prof-school', ' 5th-6th', ' 10th', ' 1st-4th',
            ' Preschool', ' 12th'
        ])
    }
    t = PrivateTable(example_table, domains, PrivacyBudget(1.0, 0.))
    assert 'Age' in t._columns
    assert 'Education' in t._columns
Exemple #7
0
def test_real_data_domain():
    dd = RealDataDomain(-1., 1.)
    assert dd.contains(0.) == True
    assert dd.contains(-2.) == False
    assert dd.contains(3.) == False