def test_make_data_federated(): data = TestDataBase() data.load_data() data_distribution = IidDataDistribution(data) train_data, train_label = data_distribution._database.train num_nodes = 3 percent = 60 # weights = np.full(num_nodes, 1/num_nodes) weights = [0.5, 0.25, 0.25] federated_data, federated_label = data_distribution.make_data_federated( train_data, train_label, num_nodes, percent, weights) data_distribution.get_federated_data(3) all_data = np.concatenate(federated_data) all_label = np.concatenate(federated_label) idx = [] for data in all_data: idx.append(np.where((data == train_data).all(axis=1))[0][0]) for i, weight in enumerate(weights): assert federated_data[i].shape[0] == int( weight * int(percent * train_data.shape[0] / 100)) assert all_data.shape[0] == int(percent * train_data.shape[0] / 100) assert num_nodes == federated_data.shape[0] == federated_label.shape[0] assert (np.sort(all_data.ravel()) == np.sort( train_data[idx, ].ravel())).all() assert (np.sort(all_label, 0) == np.sort(train_label[idx], 0)).all() #test make federated data with replacement federated_data, federated_label = data_distribution.make_data_federated( train_data, train_label, num_nodes, percent, weights, sampling="with_replacement") all_data = np.concatenate(federated_data) all_label = np.concatenate(federated_label) idx = [] for data in all_data: idx.append(np.where((data == train_data).all(axis=1))[0][0]) for i, weight in enumerate(weights): assert federated_data[i].shape[0] == int( weight * int(percent * train_data.shape[0] / 100)) assert all_data.shape[0] == int(percent * train_data.shape[0] / 100) assert num_nodes == federated_data.shape[0] == federated_label.shape[0] assert (np.sort(all_data.ravel()) == np.sort( train_data[idx, ].ravel())).all() assert (np.sort(all_label, 0) == np.sort(train_label[idx], 0)).all()
def test_make_data_federated_wrong_weights(): data = TestDataBase() data.load_data() data_distribution = IidDataDistribution(data) train_data, train_label = data_distribution._database.train num_nodes = 3 percent = 60 # weights = np.full(num_nodes, 1/num_nodes) weights = [0.5, 0.5, 0.5] federated_data, federated_label = data_distribution.make_data_federated(train_data, train_label, percent, num_nodes, weights) weights = np.array([float(i) / sum(weights) for i in weights]) data_distribution.get_federated_data(3) all_data = np.concatenate(federated_data) all_label = np.concatenate(federated_label) idx = [] for data in all_data: idx.append(np.where((data == train_data).all(axis=1))[0][0]) for i, weight in enumerate(weights): assert federated_data[i].shape[0] == int(weight * int(percent * train_data.shape[0] / 100)) assert all_data.shape[0] == int(percent * train_data.shape[0] / 100) assert num_nodes == len(federated_data) == len(federated_label) assert (np.sort(all_data.ravel()) == np.sort(train_data[idx, ].ravel())).all() assert (np.sort(all_label, 0) == np.sort(train_label[idx], 0)).all()
def test_make_data_federated_pandas(): data = TestDataBasePandas() data.load_data() data_distribution = IidDataDistribution(data) train_data, train_label = data_distribution._database.train num_nodes = 3 percent = 60 # weights = np.full(num_nodes, 1/num_nodes) weights = [0.5, 0.25, 0.25] federated_data, federated_label = data_distribution.make_data_federated(train_data, train_label, percent, num_nodes, weights) data_distribution.get_federated_data(3) all_data = pd.concat(federated_data) all_label = pd.concat(federated_label) for i, weight in enumerate(weights): assert federated_data[i].shape[0] == int(weight * int(percent * train_data.shape[0] / 100)) assert all_data.shape[0] == int(percent * train_data.shape[0] / 100) assert num_nodes == len(federated_data) == len(federated_label) pd.testing.assert_frame_equal(all_data, train_data.iloc[all_data.index.values]) pd.testing.assert_frame_equal(all_label, train_label.iloc[all_data.index.values]) #test make federated data with replacement federated_data, federated_label = data_distribution.make_data_federated(train_data, train_label, percent, num_nodes, weights, sampling="with_replacement") all_data = pd.concat(federated_data) all_label = pd.concat(federated_label) for i, weight in enumerate(weights): assert federated_data[i].shape[0] == int(weight * int(percent * train_data.shape[0] / 100)) assert all_data.shape[0] == int(percent * train_data.shape[0] / 100) assert num_nodes == len(federated_data) == len(federated_label) pd.testing.assert_frame_equal(all_data, train_data.iloc[all_data.index.values]) pd.testing.assert_frame_equal(all_label, train_label.iloc[all_data.index.values])