def test_query_federate_data():
    random_array = np.random.rand(30)
    federated_array = shfl.private.federated_operation.federate_array(random_array, 30)
    federated_array.configure_data_access(UnprotectedAccess())
    answer = federated_array.query()
    for i in range(len(answer)):
        assert answer[i] == random_array[i]
def test_get_federated_data():
    data = TestDataBase()
    data.load_data()
    dt = NonIidDataDistribution(data)

    # Identifier and num nodes is checked in private test.
    # Percent and weight is checked in idd and no_idd test.
    num_nodes = 4
    federated_data, test_data, test_label = dt.get_federated_data(num_nodes)

    x_c = []
    y_c = []
    federated_data.configure_data_access(UnprotectedAccess())
    for i in range(federated_data.num_nodes()):
        x_c.append(federated_data[i].query().data)
        y_c.append(federated_data[i].query().label)

    x_c = np.array(x_c)
    x, y = dt._database.train

    y = tf.keras.utils.to_categorical(y)

    idx = []
    for i, node in enumerate(x_c):
        labels_node = []
        for data in node:
            assert data in x
            idx.append(np.where((data == x).all(axis=1))[0][0])
            labels_node.append(y[idx[-1]].argmax(axis=-1))

    assert np.array_equal(x[idx, ].ravel(), np.concatenate(x_c).ravel())
    assert np.array_equal(test_data.ravel(), dt._database.test[0].ravel())
    assert np.array_equal(test_label, dt._database.test[1])
def test_federated_data_identifier():
    data_size = 10
    federated_data = FederatedData()
    array = np.random.rand(data_size)
    federated_data.add_data_node(array)
    federated_data.configure_data_access(UnprotectedAccess())
    with pytest.raises(ValueError):
        federated_data[0].query("bad_identifier_federated_data")
Example #4
0
def test_query_private_data():
    random_array = np.random.rand(30)
    data_node = DataNode()
    data_node.set_private_data("random_array", random_array)
    data_node.configure_data_access("random_array", UnprotectedAccess())
    data = data_node.query("random_array")
    for i in range(len(random_array)):
        assert data[i] == random_array[i]
def test_federated_data():
    data_size = 10
    federated_data = FederatedData()
    assert federated_data.num_nodes() == 0
    array = np.random.rand(data_size)
    federated_data.add_data_node(array)
    federated_data.configure_data_access(UnprotectedAccess())
    assert federated_data.num_nodes() == 1
    assert federated_data[0].query()[0] == array[0]
def test_federate_transformation():
    random_array = np.random.rand(30)
    federated_array = shfl.private.federated_operation.federate_array(random_array, 30)
    federated_array.configure_data_access(UnprotectedAccess())
    shfl.private.federated_operation.apply_federated_transformation(federated_array, TestTransformation())
    index = 0
    for data_node in federated_array:
        assert data_node.query() == random_array[index] + 1
        index = index + 1
def test_federate_array_size_private_data():
    data_size = 10000
    num_clients = 10
    array = np.random.rand(data_size)
    federated_array = shfl.private.federated_operation.federate_array(array, num_clients)
    federated_array.configure_data_access(UnprotectedAccess())
    for data_node in federated_array:
        assert len(data_node.query()) == data_size/num_clients

    assert federated_array[0].query()[0] == array[0]
Example #8
0
def test_query_model_params():
    random_array = np.random.rand(30)
    data_node = DataNode()
    model_mock = Mock()
    model_mock.get_model_params.return_value = random_array
    data_node.model = model_mock
    data_node.configure_model_params_access(UnprotectedAccess())
    model_params = data_node.query_model_params()
    for i in range(len(random_array)):
        assert model_params[i] == random_array[i]
def test_shuffle_node():
    data = np.random.rand(50).reshape([10, 5])
    label = np.random.randint(0, 10, 10)
    labeled_data = LabeledData(data, label)

    federated_data = FederatedData()
    federated_data.add_data_node(labeled_data)
    for node in federated_data:
        node.apply_data_transformation(ShuffleNode())

    federated_data.configure_data_access(UnprotectedAccess())
    assert (not np.array_equal(federated_data[0].query().label, label))
def test_federated_government_private_data():
    model_builder = Mock
    aggregator = Mock()
    database = TestDataBase()
    database.load_data()
    db = IidDataDistribution(database)
    federated_data, test_data, test_labels = db.get_federated_data(3)

    la = TestFederatedGovernment(model_builder, federated_data, aggregator,
                                 UnprotectedAccess())

    for node in la._federated_data:
        assert isinstance(node._model, model_builder)

    assert isinstance(la.global_model, model_builder)
    assert aggregator.id == la._aggregator.id
def test_split_train_test_pandas():
    num_nodes = 10
    data = pd.DataFrame(np.random.rand(10, num_nodes))
    label = pd.Series(np.random.randint(range(0, 10), num_nodes))

    federated_data = FederatedData()
    for idx in range(num_nodes):
        federated_data.add_data_node(LabeledData(data[idx], to_categorical(label[idx])))

    federated_data.configure_data_access(UnprotectedAccess())
    raw_federated_data = federated_data

    shfl.private.federated_operation.split_train_test(federated_data)

    for raw_node, split_node in zip(raw_federated_data, federated_data):
        raw_node.split_train_test()
        assert raw_node.private_data == split_node.private_data
        assert raw_node.private_test_data == split_node.private_test_data
def test_train_all_clients():
    model_builder = Mock
    aggregator = Mock()
    database = TestDataBase()
    database.load_data()
    db = IidDataDistribution(database)

    num_nodes = 3
    federated_data, test_data, test_labels = db.get_federated_data(num_nodes)

    fdg = FederatedGovernment(model_builder, federated_data, aggregator)

    fdg.train_all_clients()

    fdg._federated_data.configure_data_access(UnprotectedAccess())
    for node in fdg._federated_data:
        labeled_data = node.query()
        node._model.train.assert_called_once_with(labeled_data.data,
                                                  labeled_data.label)
def test_federated_poisoning_attack():
    num_nodes = 10
    federated_data = FederatedData()

    list_labels = []
    for i in range(num_nodes):
        data = np.random.rand(50).reshape([10, 5])
        label = np.random.randint(0, 10, 10)
        list_labels.append(label)
        labeled_data = LabeledData(data, label)
        federated_data.add_data_node(labeled_data)

    percentage = 10
    simple_attack = FederatedPoisoningDataAttack(percentage=percentage)
    simple_attack.apply_attack(federated_data=federated_data)

    adversaries_idx = simple_attack.adversaries

    federated_data.configure_data_access(UnprotectedAccess())
    for node, idx in zip(federated_data, range(num_nodes)):
        if idx in adversaries_idx:
            assert not np.array_equal(node.query().label, list_labels[idx])
        else:
            assert np.array_equal(node.query().label, list_labels[idx])
def test_constructor_bad_params():
    with pytest.raises(ValueError):
        AdaptiveDifferentialPrivacy(epsilon_delta=(1, 2, 3))

    with pytest.raises(ValueError):
        AdaptiveDifferentialPrivacy(epsilon_delta=(-1, 2))

    with pytest.raises(ValueError):
        AdaptiveDifferentialPrivacy(epsilon_delta=(1, -2))

    with pytest.raises(ValueError):
        AdaptiveDifferentialPrivacy(epsilon_delta=(1, 1), differentially_private_mechanism=UnprotectedAccess())
class DataNode:
    """
    This class represents an independent data node.

    A DataNode has its own private data and provides methods
    to initialize this data and access to it. The access to private data needs to be configured with an access policy
    before query it or an exception will be raised. A method to transform private data is also provided. This is
    a mechanism that allows data preprocessing or related task over data.

    A model (see: [Model](../../model)) can be deployed in the DataNode and use private data
    in order to learn. It is assumed that a model is represented by its parameters and the access to these parameters
    must be also configured before queries.

    # Properties:
        model: access to the model
        private_data: access to train data
        private_data_test: access to test data
    """
    def __init__(self):
        self._private_data = {}
        self._private_test_data = {}
        self._private_data_access_policies = {}
        self._model = None
        self._model_access_policy = UnprotectedAccess()

    @property
    def model(self):
        print(
            "You can't get the model, you need to query the params to access")
        print(type(self._model))
        print(self._model)

    @model.setter
    def model(self, model):
        """
        Sets the model to use in the node

        # Arguments:
            model: Instance of a class implementing ~TrainableModel
        """
        self._model = copy.deepcopy(model)

    @property
    def private_data(self):
        """
        Allows to see data for this node, but you cannot retrieve data

        # Returns:
            private : data
        """
        print(
            "Node private data, you can see the data for debug purposes but the data remains in the node"
        )
        print(type(self._private_data))
        print(self._private_data)

    @property
    def private_test_data(self):
        """
        Allows to see data for this node, but you cannot retrieve data

        # Returns:
            private : test data
        """
        print(
            "Node private test data, you can see the data for debug purposes but the data remains in the node"
        )
        print(type(self._private_test_data))
        print(self._private_test_data)

    def set_private_data(self, name, data):
        """
        Creates copy of data in private memory using name as key. If there is a previous value with this key the
        data will be overridden.

        # Arguments:
            name: String with the key identifier for the data
            data: Data to be stored in the private memory of the DataNode
        """
        self._private_data[name] = copy.deepcopy(data)

    def set_private_test_data(self, name, data):
        """
        Creates copy of test data in private memory using name as key. If there is a previous value with this key the
        data will be override.

        # Arguments:
            name: String with the key identifier for the data
            data: Data to be stored in the private memory of the DataNode
        """
        self._private_test_data[name] = copy.deepcopy(data)

    def configure_data_access(self, name, data_access_definition):
        """
        Adds a DataAccessDefinition for some concrete private data.

        # Arguments:
            name: Identifier for the data that will be configured
            data_access_definition: Policy to access data (see: [DataAccessDefinition](../data/#dataaccessdefinition-class))
        """
        self._private_data_access_policies[name] = copy.deepcopy(
            data_access_definition)

    def configure_model_params_access(self, data_access_definition):
        """
        Adds a DataAccessDefinition for model parameters.

        # Arguments:
            data_access_definition: Policy to access parameters \
            (see: [DataAccessDefinition](../data/#dataaccessdefinition-class))
        """
        self._model_access_policy = copy.deepcopy(data_access_definition)

    def apply_data_transformation(self, private_property,
                                  federated_transformation):
        """
        Executes FederatedTransformation (see: [Federated Operation](../federated_operation)) over private data.

        # Arguments:
            private_property: Identifier for the data that will be transformed
            federated_transformation: Operation to execute (see: [Federated Operation](../federated_operation))
        """
        federated_transformation.apply(self._private_data[private_property])

    def query(self, private_property, **kwargs):
        """
        Queries private data previously configured. If the access didn't configured this method will raise exception

        # Arguments:
            private_property: String with the key identifier for the data
        """
        if private_property not in self._private_data_access_policies:
            raise ValueError(
                "Data access must be configured before query data")

        data_access_policy = self._private_data_access_policies[
            private_property]
        return data_access_policy.apply(self._private_data[private_property],
                                        **kwargs)

    def query_model_params(self):
        """
        Queries model parameters. By default the parameters access is unprotected but access definition can be changed
        """
        return self._model_access_policy.apply(self._model.get_model_params())

    def set_model_params(self, model_params):
        """
        Sets the model to use in the node

        # Arguments:
            model_params: Parameters to set in the model
        """
        self._model.set_model_params(copy.deepcopy(model_params))

    def train_model(self, training_data_key):
        """
        Train the model that has been previously set in the data node

        # Arguments:
            training_data_key: String identifying the private data to use for this model. This key must contain \
            LabeledData (see: [LabeledData](../data/#labeleddata))
        """
        labeled_data = self._private_data.get(training_data_key)
        if not hasattr(labeled_data, 'data') or not hasattr(
                labeled_data, 'label'):
            raise ValueError(
                "Private data needs to have 'data' and 'label' to train a model"
            )
        self._model.train(labeled_data.data, labeled_data.label)

    def predict(self, data):
        """
        Uses the model to predict new data

        # Arguments:
            data: Data to predict

        # Returns:
            predictions: array with predictions for data argument.
        """
        return self._model.predict(data)

    def evaluate(self, data, labels):
        """
        Evaluates the performance of the model

        # Arguments:
            data: Data to predict
            labels: True values of data

        # Returns:
            metrics: array with metrics values for predictions for data argument.
        """
        return self._model.evaluate(data, labels)

    def performance(self, data, labels):
        """
        Evaluates the performance of the model in terms of the most representative metric.

        # Arguments:
            data: Data to predict
            labels: True values of data

        # Returns:
            metric: return the main metric value
        """
        return self._model.performance(data, labels)

    def local_evaluate(self, data_key):
        """
        Evaluation of local models on local data test

        # Arguments:
            data_key: key of the private data of the client
        """
        if bool(self._private_test_data):
            labeled_data = self._private_test_data.get(data_key)
            return self._model.evaluate(labeled_data.data, labeled_data.label)
        else:
            return None
 def __init__(self):
     self._private_data = {}
     self._private_test_data = {}
     self._private_data_access_policies = {}
     self._model = None
     self._model_access_policy = UnprotectedAccess()