def test_labeled_data(): data = np.random.rand(10) label = np.random.rand(1) labeled_data = LabeledData(data, label) for i in range(len(data)): assert labeled_data.data[i] == data[i] assert labeled_data.label == label new_data = np.random.rand(10) labeled_data.data = new_data for i in range(len(new_data)): assert labeled_data.data[i] == new_data[i] new_label = np.random.rand(1) labeled_data.label = new_label assert labeled_data.label == new_label
def split_train_test(self, test_split=0.2): """ Splits private_data in train and test sets # Arguments: test_split: percentage of test split """ labeled_data = self._private_data.get(self._federated_data_identifier) length = len(labeled_data.data) train_data = labeled_data.data[int(test_split * length):] train_label = labeled_data.label[int(test_split * length):] test_data = labeled_data.data[:int(test_split * length)] test_label = labeled_data.label[:int(test_split * length)] self.set_private_data(LabeledData(train_data, train_label)) self.set_private_test_data(LabeledData(test_data, test_label))
def get_federated_data(self, num_nodes, percent=100, weights=None, sampling="without_replacement"): """ Method that split the whole data between the established number of nodes. # Arguments: num_nodes: Number of nodes to create percent: Percent of the data (between 0 and 100) to be distributed (default is 100) weights: Array of weights for weighted distribution (default is None) sampling: methodology between with or without sampling (default "without_sampling") # Returns: * **federated_data, test_data, test_label** """ train_data, train_label = self._database.train test_data, test_label = self._database.test federated_train_data, federated_train_label = self.make_data_federated( train_data, train_label, num_nodes, percent, weights, sampling) federated_data = FederatedData() for node in range(num_nodes): node_data = LabeledData(federated_train_data[node], federated_train_label[node]) federated_data.add_data_node(node_data) return federated_data, test_data, test_label
def get_federated_data(self, percent=100, *args, **kwargs): """ Method that splits the whole data between the established number of nodes. # Arguments: num_nodes: Number of nodes to create percent: Percent of the data (between 0 and 100) to be distributed (default is 100) # Returns: * **federated_data, test_data, test_label** """ train_data, train_label = self._database.train test_data, test_label = self._database.test federated_train_data, federated_train_label = self.make_data_federated( train_data, train_label, percent, *args, **kwargs) federated_data = FederatedData() num_nodes = len(federated_train_label) for node in range(num_nodes): node_data = LabeledData(federated_train_data[node], federated_train_label[node]) federated_data.add_data_node(node_data) return federated_data, test_data, test_label
def test_train_model_data(): random_array = np.random.rand(30) random_array_labels = np.random.rand(30) labeled_data = LabeledData(random_array, random_array_labels) data_node = DataNode() model_mock = Mock() data_node.model = model_mock data_node.set_private_data("random_array", labeled_data) data_node.train_model("random_array") model_mock.train.assert_not_called() copy_mock.train.assert_called_once()
def test_shuffle_node(): data = np.random.rand(50).reshape([10, 5]) label = np.random.randint(0, 10, 10) labeled_data = LabeledData(data, label) federated_data = FederatedData() federated_data.add_data_node(labeled_data) for node in federated_data: node.apply_data_transformation(ShuffleNode()) federated_data.configure_data_access(UnprotectedAccess()) assert (not np.array_equal(federated_data[0].query().label, label))
def test_split_train_test_pandas(): num_nodes = 10 data = pd.DataFrame(np.random.rand(10, num_nodes)) label = pd.Series(np.random.randint(range(0, 10), num_nodes)) federated_data = FederatedData() for idx in range(num_nodes): federated_data.add_data_node(LabeledData(data[idx], to_categorical(label[idx]))) federated_data.configure_data_access(UnprotectedAccess()) raw_federated_data = federated_data shfl.private.federated_operation.split_train_test(federated_data) for raw_node, split_node in zip(raw_federated_data, federated_data): raw_node.split_train_test() assert raw_node.private_data == split_node.private_data assert raw_node.private_test_data == split_node.private_test_data
def test_federated_poisoning_attack(): num_nodes = 10 federated_data = FederatedData() list_labels = [] for i in range(num_nodes): data = np.random.rand(50).reshape([10, 5]) label = np.random.randint(0, 10, 10) list_labels.append(label) labeled_data = LabeledData(data, label) federated_data.add_data_node(labeled_data) percentage = 10 simple_attack = FederatedPoisoningDataAttack(percentage=percentage) simple_attack.apply_attack(federated_data=federated_data) adversaries_idx = simple_attack.adversaries federated_data.configure_data_access(UnprotectedAccess()) for node, idx in zip(federated_data, range(num_nodes)): if idx in adversaries_idx: assert not np.array_equal(node.query().label, list_labels[idx]) else: assert np.array_equal(node.query().label, list_labels[idx])