def test_labeled_data():
    data = np.random.rand(10)
    label = np.random.rand(1)
    labeled_data = LabeledData(data, label)
    for i in range(len(data)):
        assert labeled_data.data[i] == data[i]
    assert labeled_data.label == label
    new_data = np.random.rand(10)
    labeled_data.data = new_data
    for i in range(len(new_data)):
        assert labeled_data.data[i] == new_data[i]
    new_label = np.random.rand(1)
    labeled_data.label = new_label
    assert labeled_data.label == new_label
Beispiel #2
0
    def split_train_test(self, test_split=0.2):
        """
        Splits private_data in train and test sets

        # Arguments:
            test_split: percentage of test split
        """
        labeled_data = self._private_data.get(self._federated_data_identifier)
        length = len(labeled_data.data)
        train_data = labeled_data.data[int(test_split * length):]
        train_label = labeled_data.label[int(test_split * length):]
        test_data = labeled_data.data[:int(test_split * length)]
        test_label = labeled_data.label[:int(test_split * length)]

        self.set_private_data(LabeledData(train_data, train_label))
        self.set_private_test_data(LabeledData(test_data, test_label))
Beispiel #3
0
    def get_federated_data(self,
                           num_nodes,
                           percent=100,
                           weights=None,
                           sampling="without_replacement"):
        """
        Method that split the whole data between the established number of nodes.

        # Arguments:
            num_nodes: Number of nodes to create
            percent: Percent of the data (between 0 and 100) to be distributed (default is 100)
            weights: Array of weights for weighted distribution (default is None)
            sampling: methodology between with or without sampling (default "without_sampling")

        # Returns:
              * **federated_data, test_data, test_label**
        """

        train_data, train_label = self._database.train
        test_data, test_label = self._database.test

        federated_train_data, federated_train_label = self.make_data_federated(
            train_data, train_label, num_nodes, percent, weights, sampling)

        federated_data = FederatedData()
        for node in range(num_nodes):
            node_data = LabeledData(federated_train_data[node],
                                    federated_train_label[node])
            federated_data.add_data_node(node_data)

        return federated_data, test_data, test_label
    def get_federated_data(self, percent=100, *args, **kwargs):
        """
        Method that splits the whole data between the established number of nodes.

        # Arguments:
            num_nodes: Number of nodes to create
            percent: Percent of the data (between 0 and 100) to be distributed (default is 100)

        # Returns:
              * **federated_data, test_data, test_label**
        """

        train_data, train_label = self._database.train
        test_data, test_label = self._database.test

        federated_train_data, federated_train_label = self.make_data_federated(
            train_data, train_label, percent, *args, **kwargs)

        federated_data = FederatedData()
        num_nodes = len(federated_train_label)
        for node in range(num_nodes):
            node_data = LabeledData(federated_train_data[node],
                                    federated_train_label[node])
            federated_data.add_data_node(node_data)

        return federated_data, test_data, test_label
Beispiel #5
0
def test_train_model_data():
    random_array = np.random.rand(30)
    random_array_labels = np.random.rand(30)
    labeled_data = LabeledData(random_array, random_array_labels)
    data_node = DataNode()
    model_mock = Mock()
    data_node.model = model_mock
    data_node.set_private_data("random_array", labeled_data)
    data_node.train_model("random_array")
    model_mock.train.assert_not_called()
    copy_mock.train.assert_called_once()
def test_shuffle_node():
    data = np.random.rand(50).reshape([10, 5])
    label = np.random.randint(0, 10, 10)
    labeled_data = LabeledData(data, label)

    federated_data = FederatedData()
    federated_data.add_data_node(labeled_data)
    for node in federated_data:
        node.apply_data_transformation(ShuffleNode())

    federated_data.configure_data_access(UnprotectedAccess())
    assert (not np.array_equal(federated_data[0].query().label, label))
def test_split_train_test_pandas():
    num_nodes = 10
    data = pd.DataFrame(np.random.rand(10, num_nodes))
    label = pd.Series(np.random.randint(range(0, 10), num_nodes))

    federated_data = FederatedData()
    for idx in range(num_nodes):
        federated_data.add_data_node(LabeledData(data[idx], to_categorical(label[idx])))

    federated_data.configure_data_access(UnprotectedAccess())
    raw_federated_data = federated_data

    shfl.private.federated_operation.split_train_test(federated_data)

    for raw_node, split_node in zip(raw_federated_data, federated_data):
        raw_node.split_train_test()
        assert raw_node.private_data == split_node.private_data
        assert raw_node.private_test_data == split_node.private_test_data
def test_federated_poisoning_attack():
    num_nodes = 10
    federated_data = FederatedData()

    list_labels = []
    for i in range(num_nodes):
        data = np.random.rand(50).reshape([10, 5])
        label = np.random.randint(0, 10, 10)
        list_labels.append(label)
        labeled_data = LabeledData(data, label)
        federated_data.add_data_node(labeled_data)

    percentage = 10
    simple_attack = FederatedPoisoningDataAttack(percentage=percentage)
    simple_attack.apply_attack(federated_data=federated_data)

    adversaries_idx = simple_attack.adversaries

    federated_data.configure_data_access(UnprotectedAccess())
    for node, idx in zip(federated_data, range(num_nodes)):
        if idx in adversaries_idx:
            assert not np.array_equal(node.query().label, list_labels[idx])
        else:
            assert np.array_equal(node.query().label, list_labels[idx])