def test_make_data_federated_wrong_weights(): data = TestDataBase() data.load_data() data_distribution = IidDataDistribution(data) train_data, train_label = data_distribution._database.train num_nodes = 3 percent = 60 # weights = np.full(num_nodes, 1/num_nodes) weights = [0.5, 0.5, 0.5] federated_data, federated_label = data_distribution.make_data_federated(train_data, train_label, percent, num_nodes, weights) weights = np.array([float(i) / sum(weights) for i in weights]) data_distribution.get_federated_data(3) all_data = np.concatenate(federated_data) all_label = np.concatenate(federated_label) idx = [] for data in all_data: idx.append(np.where((data == train_data).all(axis=1))[0][0]) for i, weight in enumerate(weights): assert federated_data[i].shape[0] == int(weight * int(percent * train_data.shape[0] / 100)) assert all_data.shape[0] == int(percent * train_data.shape[0] / 100) assert num_nodes == len(federated_data) == len(federated_label) assert (np.sort(all_data.ravel()) == np.sort(train_data[idx, ].ravel())).all() assert (np.sort(all_label, 0) == np.sort(train_label[idx], 0)).all()
def test_make_data_federated(): data = TestDataBase() data.load_data() data_distribution = IidDataDistribution(data) train_data, train_label = data_distribution._database.train num_nodes = 3 percent = 60 # weights = np.full(num_nodes, 1/num_nodes) weights = [0.5, 0.25, 0.25] federated_data, federated_label = data_distribution.make_data_federated( train_data, train_label, num_nodes, percent, weights) data_distribution.get_federated_data(3) all_data = np.concatenate(federated_data) all_label = np.concatenate(federated_label) idx = [] for data in all_data: idx.append(np.where((data == train_data).all(axis=1))[0][0]) for i, weight in enumerate(weights): assert federated_data[i].shape[0] == int( weight * int(percent * train_data.shape[0] / 100)) assert all_data.shape[0] == int(percent * train_data.shape[0] / 100) assert num_nodes == federated_data.shape[0] == federated_label.shape[0] assert (np.sort(all_data.ravel()) == np.sort( train_data[idx, ].ravel())).all() assert (np.sort(all_label, 0) == np.sort(train_label[idx], 0)).all() #test make federated data with replacement federated_data, federated_label = data_distribution.make_data_federated( train_data, train_label, num_nodes, percent, weights, sampling="with_replacement") all_data = np.concatenate(federated_data) all_label = np.concatenate(federated_label) idx = [] for data in all_data: idx.append(np.where((data == train_data).all(axis=1))[0][0]) for i, weight in enumerate(weights): assert federated_data[i].shape[0] == int( weight * int(percent * train_data.shape[0] / 100)) assert all_data.shape[0] == int(percent * train_data.shape[0] / 100) assert num_nodes == federated_data.shape[0] == federated_label.shape[0] assert (np.sort(all_data.ravel()) == np.sort( train_data[idx, ].ravel())).all() assert (np.sort(all_label, 0) == np.sort(train_label[idx], 0)).all()
def __init__(self, data_base_name_key, iid=True, num_nodes=20, percent=100): if data_base_name_key in ImagesDataBases.__members__.keys(): module = ImagesDataBases.__members__[data_base_name_key].value data_base = module() train_data, train_labels, test_data, test_labels = data_base.load_data( ) if iid: distribution = IidDataDistribution(data_base) else: distribution = NonIidDataDistribution(data_base) federated_data, self._test_data, self._test_labels = distribution.get_federated_data( num_nodes=num_nodes, percent=percent) apply_federated_transformation(federated_data, Reshape()) mean = np.mean(train_data.data) std = np.std(train_data.data) apply_federated_transformation(federated_data, Normalize(mean, std)) aggregator = FedAvgAggregator() super().__init__(self.model_builder, federated_data, aggregator) else: print("The data base name is not included. Try with: " + str(", ".join([e.name for e in ImagesDataBases]))) self._test_data = None
def test_IowaFederatedGovernment(): model_builder = Mock database = TestDataBase() database.load_data() db = IidDataDistribution(database) num_nodes = 3 federated_data, test_data, test_labels = db.get_federated_data(num_nodes) a = 0 b = 1 c = 2 y_b = 3 k = 4 dynamic = True iowa_fg = IowaFederatedGovernment(model_builder, federated_data, model_params_access=None, dynamic=dynamic, a=a, b=b, c=c, y_b=y_b, k=k) assert isinstance(iowa_fg._aggregator, IowaFederatedAggregator) assert isinstance(iowa_fg._model, model_builder) assert np.array_equal(iowa_fg._federated_data, federated_data) assert iowa_fg._a == a assert iowa_fg._b == b assert iowa_fg._c == c assert iowa_fg._y_b == y_b assert iowa_fg._k == k assert iowa_fg._dynamic == dynamic
def test_run_rounds_local_tests(): model_builder = Mock aggregator = Mock() database = TestDataBase() database.load_data() db = IidDataDistribution(database) num_nodes = 3 federated_data, test_data, test_labels = db.get_federated_data(num_nodes) split_train_test(federated_data) fdg = FederatedGovernment(model_builder, federated_data, aggregator) fdg.deploy_central_model = Mock() fdg.train_all_clients = Mock() fdg.evaluate_clients = Mock() fdg.aggregate_weights = Mock() fdg.evaluate_global_model = Mock() fdg.run_rounds(1, test_data, test_labels) fdg.deploy_central_model.assert_called_once() fdg.train_all_clients.assert_called_once() fdg.evaluate_clients.assert_called_once_with(test_data, test_labels) fdg.aggregate_weights.assert_called_once() fdg.evaluate_global_model.assert_called_once_with(test_data, test_labels)
def __init__(self, data_base_name_key, iid=True, num_nodes=20, percent=100): if data_base_name_key in ClusteringDataBases.__members__.keys(): module = ClusteringDataBases.__members__[data_base_name_key].value data_base = module() train_data, train_labels, test_data, test_labels = data_base.load_data( ) self._num_clusters = len(np.unique(train_labels)) self._num_features = train_data.shape[1] if iid: distribution = IidDataDistribution(data_base) else: distribution = NonIidDataDistribution(data_base) federated_data, self._test_data, self._test_labels = distribution.get_federated_data( num_nodes=num_nodes, percent=percent) aggregator = ClusterFedAvgAggregator() super().__init__(self.model_builder, federated_data, aggregator) else: print("The data base name is not included. Try with: " + str(", ".join([e.name for e in ClusteringDataBases]))) self._test_data = None
def test_make_data_federated_pandas(): data = TestDataBasePandas() data.load_data() data_distribution = IidDataDistribution(data) train_data, train_label = data_distribution._database.train num_nodes = 3 percent = 60 # weights = np.full(num_nodes, 1/num_nodes) weights = [0.5, 0.25, 0.25] federated_data, federated_label = data_distribution.make_data_federated(train_data, train_label, percent, num_nodes, weights) data_distribution.get_federated_data(3) all_data = pd.concat(federated_data) all_label = pd.concat(federated_label) for i, weight in enumerate(weights): assert federated_data[i].shape[0] == int(weight * int(percent * train_data.shape[0] / 100)) assert all_data.shape[0] == int(percent * train_data.shape[0] / 100) assert num_nodes == len(federated_data) == len(federated_label) pd.testing.assert_frame_equal(all_data, train_data.iloc[all_data.index.values]) pd.testing.assert_frame_equal(all_label, train_label.iloc[all_data.index.values]) #test make federated data with replacement federated_data, federated_label = data_distribution.make_data_federated(train_data, train_label, percent, num_nodes, weights, sampling="with_replacement") all_data = pd.concat(federated_data) all_label = pd.concat(federated_label) for i, weight in enumerate(weights): assert federated_data[i].shape[0] == int(weight * int(percent * train_data.shape[0] / 100)) assert all_data.shape[0] == int(percent * train_data.shape[0] / 100) assert num_nodes == len(federated_data) == len(federated_label) pd.testing.assert_frame_equal(all_data, train_data.iloc[all_data.index.values]) pd.testing.assert_frame_equal(all_label, train_label.iloc[all_data.index.values])
def test_run_rounds(): np.random.seed(123) model_builder = Mock database = TestDataBase() database.load_data() db = IidDataDistribution(database) num_nodes = 3 federated_data, test_data, test_label = db.get_federated_data(num_nodes) iowa_fg = IowaFederatedGovernment(model_builder, federated_data) n = 1 iowa_fg.deploy_central_model = Mock() iowa_fg.train_all_clients = Mock() iowa_fg.evaluate_clients = Mock() iowa_fg.performance_clients = Mock() iowa_fg.performance_clients.return_value = 0 iowa_fg._aggregator.set_ponderation = Mock() iowa_fg.aggregate_weights = Mock() iowa_fg.evaluate_global_model = Mock() iowa_fg.run_rounds(n, test_data, test_label) # Replicate test an validate data randomize = [0, 9, 3, 4, 6, 8, 2, 1, 5, 7] test_data = test_data[randomize, ] test_label = test_label[randomize] validation_data = test_data[:int(0.15 * len(test_label)), ] validation_label = test_label[:int(0.15 * len(test_label))] test_data = test_data[int(0.15 * len(test_label)):, ] test_label = test_label[int(0.15 * len(test_label)):] iowa_fg.deploy_central_model.assert_called_once() iowa_fg.deploy_central_model.assert_called_once() iowa_fg.train_all_clients.assert_called_once() iowa_fg.evaluate_clients.assert_called_once() assert len(iowa_fg.evaluate_clients.call_args[0]) == 2 np.testing.assert_array_equal(iowa_fg.evaluate_clients.call_args[0][0], test_data) np.testing.assert_array_equal(iowa_fg.evaluate_clients.call_args[0][1], test_label) iowa_fg.performance_clients.assert_called_once() assert len(iowa_fg.performance_clients.call_args[0]) == 2 np.testing.assert_array_equal(iowa_fg.performance_clients.call_args[0][0], validation_data) np.testing.assert_array_equal(iowa_fg.performance_clients.call_args[0][1], validation_label) iowa_fg._aggregator.set_ponderation.assert_called_once_with( iowa_fg.performance_clients.return_value, iowa_fg._dynamic, iowa_fg._a, iowa_fg._b, iowa_fg._c, iowa_fg._y_b, iowa_fg._k) iowa_fg.aggregate_weights.assert_called_once() iowa_fg.evaluate_global_model.assert_called_once() assert len(iowa_fg.evaluate_global_model.call_args[0]) == 2 np.testing.assert_array_equal(iowa_fg.evaluate_clients.call_args[0][0], test_data) np.testing.assert_array_equal( iowa_fg.evaluate_global_model.call_args[0][1], test_label)
def test_evaluate_global_model(): model_builder = Mock aggregator = Mock() database = TestDataBase() database.load_data() db = IidDataDistribution(database) num_nodes = 3 federated_data, test_data, test_labels = db.get_federated_data(num_nodes) fdg = FederatedGovernment(model_builder, federated_data, aggregator) fdg._model.evaluate.return_value = np.random.randint(0, 10, 40) fdg.evaluate_global_model(test_data, test_labels) fdg._model.evaluate.assert_called_once_with(test_data, test_labels)
def test_federated_government_private_data(): model_builder = Mock aggregator = Mock() database = TestDataBase() database.load_data() db = IidDataDistribution(database) federated_data, test_data, test_labels = db.get_federated_data(3) la = TestFederatedGovernment(model_builder, federated_data, aggregator, UnprotectedAccess()) for node in la._federated_data: assert isinstance(node._model, model_builder) assert isinstance(la.global_model, model_builder) assert aggregator.id == la._aggregator.id
def test_deploy_central_model(): model_builder = Mock aggregator = Mock() database = TestDataBase() database.load_data() db = IidDataDistribution(database) num_nodes = 3 federated_data, test_data, test_labels = db.get_federated_data(num_nodes) fdg = FederatedGovernment(model_builder, federated_data, aggregator) array_params = np.random.rand(30) fdg._model.get_model_params.return_value = array_params fdg.deploy_central_model() for node in fdg._federated_data: node._model.set_model_params.assert_called_once()
def test_aggregate_weights(): model_builder = Mock aggregator = Mock() database = TestDataBase() database.load_data() db = IidDataDistribution(database) num_nodes = 3 federated_data, test_data, test_labels = db.get_federated_data(num_nodes) fdg = FederatedGovernment(model_builder, federated_data, aggregator) weights = np.random.rand(64, 32) fdg._aggregator.aggregate_weights.return_value = weights fdg.aggregate_weights() fdg._model.set_model_params.assert_called_once_with(weights)
def test_train_all_clients(): model_builder = Mock aggregator = Mock() database = TestDataBase() database.load_data() db = IidDataDistribution(database) num_nodes = 3 federated_data, test_data, test_labels = db.get_federated_data(num_nodes) fdg = FederatedGovernment(model_builder, federated_data, aggregator) fdg.train_all_clients() fdg._federated_data.configure_data_access(UnprotectedAccess()) for node in fdg._federated_data: labeled_data = node.query() node._model.train.assert_called_once_with(labeled_data.data, labeled_data.label)
def test_performance_clients(): model_builder = Mock database = TestDataBase() database.load_data() db = IidDataDistribution(database) num_nodes = 3 federated_data, test_data, test_labels = db.get_federated_data(num_nodes) iowa_fg = IowaFederatedGovernment(model_builder, federated_data) for i, data_node in enumerate(iowa_fg._federated_data): data_node.performance = Mock() data_node.performance.return_value = i res = np.arange(iowa_fg._federated_data.num_nodes()) data_val = np.random.rand(25).reshape((5, 5)) labels_val = np.random.randint(0, 2, 5) performance = iowa_fg.performance_clients(data_val, labels_val) assert np.array_equal(performance, res) for data_node in iowa_fg._federated_data: data_node.performance.assert_called_once_with(data_val, labels_val)
def test_evaluate_clients_local(): model_builder = Mock aggregator = Mock() database = TestDataBase() database.load_data() db = IidDataDistribution(database) num_nodes = 3 federated_data, test_data, test_labels = db.get_federated_data(num_nodes) fdg = FederatedGovernment(model_builder, federated_data, aggregator) for node in fdg._federated_data: node.evaluate = Mock() node.evaluate.return_value = [ np.random.randint(0, 10, 40), np.random.randint(0, 10, 40) ] fdg.evaluate_clients(test_data, test_labels) for node in fdg._federated_data: node.evaluate.assert_called_once_with(test_data, test_labels)