Python StructuredDataset Examples

Programming Language: Python

Namespace/Package Name: aif360.datasets

Class/Type: StructuredDataset

Examples at hotexamples.com: 11

Python StructuredDataset - 11 examples found. These are the top rated real world Python examples of aif360.datasets.StructuredDataset extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

StructuredDataset(6)

_parse_feature_names(5)

copy(4)

split(2)

temporarily_ignore(1)

Example #1

0

Show file

def test_temporarily_ignore():
    sd = StructuredDataset(df=df, label_names=['label'], protected_attribute_names=['one', 'three'])
    modified = sd.copy()
    modified.labels = sd.labels + 1
    assert sd != modified
    with sd.temporarily_ignore('labels'):
        assert sd == modified
    assert 'labels' not in sd.ignore_fields

Example #2

0

Show file

def test_copy():
    sd = StructuredDataset(df=df, label_names=['label'], protected_attribute_names=['two'])
    sd2 = sd.copy()
    sd3 = sd.copy(True)

    sd.features[0] = 999
    assert np.all(sd2.features[0] == 999)
    assert not np.any(sd3.features[0] == 999)

Example #3

0

Show file

def test_split():
    sd = StructuredDataset(df=df, label_names=['label'], protected_attribute_names=['two'])
    train, test = sd.split([0.5])
    train2, test2 = sd.split(2)

    assert train == train2
    assert test == test2
    assert np.all(np.concatenate((train.features, test.features)) == sd.features)

Example #4

0

Show file

def test_eq():
    sd = StructuredDataset(df=df, label_names=['label'], protected_attribute_names=['two'])
    sd2 = sd.copy()
    sd3 = sd.copy(True)
    sd4 = StructuredDataset(df=df, label_names=['label'], protected_attribute_names=['one', 'three'])

    assert sd == sd2
    assert sd == sd3
    assert sd2 == sd3
    assert sd != sd4

Example #5

0

Show file

def test_k_folds():
    sd = StructuredDataset(df=df, label_names=['label'], protected_attribute_names=['two'])
    folds = sd.split(4)

    assert len(folds) == 4
    assert all(f.features.shape[0] == f.labels.shape[0]
            == f.protected_attributes.shape[0] == len(f.instance_names)
            == f.instance_weights.shape[0] == 1 for f in folds)

    folds = sd.split(3)
    assert folds[0].features.shape[0] == 2

Example #6

0

Show file

File: simmixin.py Project: resulknad/fair-sim

    def enforce_dummy_coded(self, X):
        """
        Enforces that for dummycoded features exactly one feature is set to 1, all the others to 0. Called after gradient ascend.

        :param X: Feature matrix (dimension `n_instances x n_features`)
        :returns: X' (modified feature matrix)
        """
        for k, v in StructuredDataset._parse_feature_names(
                self.feature_names)[0].items():
            ft_indices = (list(
                map(lambda x: self.feature_names.index(k + '=' + x), v)))
            #            print(k,ft_indices, v)
            max_index = np.argmax(X[:, ft_indices], axis=1)

            #            for i in range(len(max_index)):
            #                if X[i,ft_indices].sum() > 0 and k == 'credit_history':
            #                    print(k)
            #                    print(X[i,ft_indices])
            #                    print((X[i,ft_indices] == 1))

            X[:, ft_indices] = 0
            for i in range(len(max_index)):
                X[i, ft_indices[max_index[i]]] = 1
            for x in X:
                assert (x[ft_indices].sum() == 1)

#        print(X.shape)
        return X

Example #7

0

Show file

File: simmixin.py Project: resulknad/fair-sim

 def _get_domain(self, ft):
     """
     Infers domain of feature.
     :param ft: Feature name
     :returns: Domain
     """
     if callable(self.domains[ft]):
         return [self.domains[ft]()]
     elif self._is_dummy_coded(ft):
         raise Exception("Can't use dummy coded for sim")
         warnings.warn(
             "Use set of values present in dataset to infer domain for feature "
             + ft)
         # discrete, dummy coded
         return StructuredDataset._parse_feature_names(
             self.feature_names)[0][ft]
     elif ft in self.discrete:
         # discrete
         #warnings.warn("Use set of values present in dataset to infer domain for feature " + ft)
         return list(set(self.features[:, self._ft_index(ft)]))
     else:
         # continious
         df, _ = self.convert_to_dataframe()
         warnings.warn("Used min/max for feature " + ft +
                       " to infer domain + unsupported/not implemented yet")
         return (min(df[ft]), max(df[:, ft]))

Example #8

0

Show file

File: simmixin.py Project: resulknad/fair-sim

 def _is_dummy_coded(self, ft):
     """
     :param ft: Feature name
     :returns: True if ft is dummycoded
     """
     # fix this
     return len(
         StructuredDataset._parse_feature_names(self.feature_names)[0][ft])

Example #9

0

Show file

File: simmixin.py Project: resulknad/fair-sim

    def _dedummy_code_obj(self, obj, sep='='):
        """
        :param obj: Instance (feature values) in object form (dict)
        :param sep: Seperator used for dummy coding
        :returns: dedummy coded object
        """
        # reimplemented this bc library is too slow for one row only...
        result_obj = obj.copy()
        for k, v in (StructuredDataset._parse_feature_names(
                self.feature_names)[0]).items():
            # figure out which dummy coded is set to 1
            value_l = list(filter(lambda x: obj[k + sep + x] == 1, v))
            value = value_l.pop() if len(value_l) > 0 else None

            # convert to non-dummy coded
            result_obj[k] = value

            # remove all dummy coded ie [key=value]
            [result_obj.pop(k + sep + option) for option in v]

        return result_obj

Example #10

0

Show file

File: simmixin.py Project: resulknad/fair-sim

    def scale_dummy_coded(self, X):
        """
        Ensures that the values for one dummy-coded feature sum up to 1 (scales accordingly). Called during gradient ascend. You may find an in-depth explanation in the write-up.

        :param X: Feature matrix (dimension `n_instances x n_features`)
        :returns: X' (modified feature matrix)
        """
        #print(np.where(X[:,12]>0.8))

        for k, v in StructuredDataset._parse_feature_names(
                self.feature_names)[0].items():
            ft_indices = (list(
                map(lambda x: self.feature_names.index(k + '=' + x), v)))

            #if k == 'property':
            #    print(X[4,ft_indices])

            X[:, ft_indices] = X[:, ft_indices] / X[:, ft_indices].sum(
                axis=1)[:, None]

            assert (np.isclose(X[:, ft_indices].sum(axis=1).sum(), len(X)))

        return X

Example #11

0

Show file

File: test_sample_distortion_metric.py Project: zysophia/AIF360

import numpy as np
import pandas as pd
from scipy.spatial.distance import cdist

from aif360.datasets import StructuredDataset
from aif360.metrics import SampleDistortionMetric

data = np.arange(12).reshape((3, 4)).T
cols = ['one', 'two', 'three', 'label']
labs = np.ones((4, 1))

df = pd.DataFrame(data=np.concatenate((data, labs), axis=1), columns=cols)
sd = StructuredDataset(df=df,
                       label_names=['label'],
                       protected_attribute_names=['one', 'three'])

distorted = data + 1

sd_distorted = sd.copy(True)
sd_distorted.features = distorted

rand = np.random.randint(0, 10, (4, 4))
rand2 = np.random.randint(0, 10, (4, 3))
df_rand = pd.DataFrame(data=rand, columns=cols)
sd_rand = StructuredDataset(df=df_rand,
                            label_names=['label'],
                            protected_attribute_names=['one', 'three'])
sd_rand2 = sd_rand.copy(True)
sd_rand2.features = rand2

priv = [{'one': 1}]