예제 #1
0
def test_make_data_federated_wrong_weights():
    data = TestDataBase()
    data.load_data()
    data_distribution = IidDataDistribution(data)

    train_data, train_label = data_distribution._database.train

    num_nodes = 3
    percent = 60
    # weights = np.full(num_nodes, 1/num_nodes)
    weights = [0.5, 0.5, 0.5]
    federated_data, federated_label = data_distribution.make_data_federated(train_data,
                                                                            train_label,
                                                                            percent,
                                                                            num_nodes,
                                                                            weights)

    weights = np.array([float(i) / sum(weights) for i in weights])
    data_distribution.get_federated_data(3)

    all_data = np.concatenate(federated_data)
    all_label = np.concatenate(federated_label)

    idx = []
    for data in all_data:
        idx.append(np.where((data == train_data).all(axis=1))[0][0])

    for i, weight in enumerate(weights):
        assert federated_data[i].shape[0] == int(weight * int(percent * train_data.shape[0] / 100))

    assert all_data.shape[0] == int(percent * train_data.shape[0] / 100)
    assert num_nodes == len(federated_data) == len(federated_label)
    assert (np.sort(all_data.ravel()) == np.sort(train_data[idx, ].ravel())).all()
    assert (np.sort(all_label, 0) == np.sort(train_label[idx], 0)).all()
예제 #2
0
def test_make_data_federated():
    data = TestDataBase()
    data.load_data()
    data_distribution = IidDataDistribution(data)

    train_data, train_label = data_distribution._database.train

    num_nodes = 3
    percent = 60
    # weights = np.full(num_nodes, 1/num_nodes)
    weights = [0.5, 0.25, 0.25]
    federated_data, federated_label = data_distribution.make_data_federated(
        train_data, train_label, num_nodes, percent, weights)
    data_distribution.get_federated_data(3)

    all_data = np.concatenate(federated_data)
    all_label = np.concatenate(federated_label)

    idx = []
    for data in all_data:
        idx.append(np.where((data == train_data).all(axis=1))[0][0])

    for i, weight in enumerate(weights):
        assert federated_data[i].shape[0] == int(
            weight * int(percent * train_data.shape[0] / 100))

    assert all_data.shape[0] == int(percent * train_data.shape[0] / 100)
    assert num_nodes == federated_data.shape[0] == federated_label.shape[0]
    assert (np.sort(all_data.ravel()) == np.sort(
        train_data[idx, ].ravel())).all()
    assert (np.sort(all_label, 0) == np.sort(train_label[idx], 0)).all()

    #test make federated data with replacement
    federated_data, federated_label = data_distribution.make_data_federated(
        train_data,
        train_label,
        num_nodes,
        percent,
        weights,
        sampling="with_replacement")
    all_data = np.concatenate(federated_data)
    all_label = np.concatenate(federated_label)

    idx = []
    for data in all_data:
        idx.append(np.where((data == train_data).all(axis=1))[0][0])

    for i, weight in enumerate(weights):
        assert federated_data[i].shape[0] == int(
            weight * int(percent * train_data.shape[0] / 100))

    assert all_data.shape[0] == int(percent * train_data.shape[0] / 100)
    assert num_nodes == federated_data.shape[0] == federated_label.shape[0]
    assert (np.sort(all_data.ravel()) == np.sort(
        train_data[idx, ].ravel())).all()
    assert (np.sort(all_label, 0) == np.sort(train_label[idx], 0)).all()
    def __init__(self,
                 data_base_name_key,
                 iid=True,
                 num_nodes=20,
                 percent=100):
        if data_base_name_key in ImagesDataBases.__members__.keys():
            module = ImagesDataBases.__members__[data_base_name_key].value
            data_base = module()
            train_data, train_labels, test_data, test_labels = data_base.load_data(
            )

            if iid:
                distribution = IidDataDistribution(data_base)
            else:
                distribution = NonIidDataDistribution(data_base)

            federated_data, self._test_data, self._test_labels = distribution.get_federated_data(
                num_nodes=num_nodes, percent=percent)
            apply_federated_transformation(federated_data, Reshape())
            mean = np.mean(train_data.data)
            std = np.std(train_data.data)
            apply_federated_transformation(federated_data,
                                           Normalize(mean, std))

            aggregator = FedAvgAggregator()

            super().__init__(self.model_builder, federated_data, aggregator)

        else:
            print("The data base name is not included. Try with: " +
                  str(", ".join([e.name for e in ImagesDataBases])))
            self._test_data = None
def test_IowaFederatedGovernment():
    model_builder = Mock
    database = TestDataBase()
    database.load_data()
    db = IidDataDistribution(database)

    num_nodes = 3
    federated_data, test_data, test_labels = db.get_federated_data(num_nodes)

    a = 0
    b = 1
    c = 2
    y_b = 3
    k = 4
    dynamic = True
    iowa_fg = IowaFederatedGovernment(model_builder, federated_data, model_params_access=None,
                                      dynamic=dynamic, a=a, b=b, c=c, y_b=y_b, k=k)

    assert isinstance(iowa_fg._aggregator, IowaFederatedAggregator)
    assert isinstance(iowa_fg._model, model_builder)
    assert np.array_equal(iowa_fg._federated_data, federated_data)
    assert iowa_fg._a == a
    assert iowa_fg._b == b
    assert iowa_fg._c == c
    assert iowa_fg._y_b == y_b
    assert iowa_fg._k == k
    assert iowa_fg._dynamic == dynamic
def test_run_rounds_local_tests():
    model_builder = Mock
    aggregator = Mock()
    database = TestDataBase()
    database.load_data()
    db = IidDataDistribution(database)

    num_nodes = 3
    federated_data, test_data, test_labels = db.get_federated_data(num_nodes)

    split_train_test(federated_data)

    fdg = FederatedGovernment(model_builder, federated_data, aggregator)

    fdg.deploy_central_model = Mock()
    fdg.train_all_clients = Mock()
    fdg.evaluate_clients = Mock()
    fdg.aggregate_weights = Mock()
    fdg.evaluate_global_model = Mock()

    fdg.run_rounds(1, test_data, test_labels)

    fdg.deploy_central_model.assert_called_once()
    fdg.train_all_clients.assert_called_once()
    fdg.evaluate_clients.assert_called_once_with(test_data, test_labels)
    fdg.aggregate_weights.assert_called_once()
    fdg.evaluate_global_model.assert_called_once_with(test_data, test_labels)
    def __init__(self,
                 data_base_name_key,
                 iid=True,
                 num_nodes=20,
                 percent=100):
        if data_base_name_key in ClusteringDataBases.__members__.keys():
            module = ClusteringDataBases.__members__[data_base_name_key].value
            data_base = module()
            train_data, train_labels, test_data, test_labels = data_base.load_data(
            )

            self._num_clusters = len(np.unique(train_labels))
            self._num_features = train_data.shape[1]

            if iid:
                distribution = IidDataDistribution(data_base)
            else:
                distribution = NonIidDataDistribution(data_base)

            federated_data, self._test_data, self._test_labels = distribution.get_federated_data(
                num_nodes=num_nodes, percent=percent)

            aggregator = ClusterFedAvgAggregator()

            super().__init__(self.model_builder, federated_data, aggregator)

        else:
            print("The data base name is not included. Try with: " +
                  str(", ".join([e.name for e in ClusteringDataBases])))
            self._test_data = None
예제 #7
0
def test_make_data_federated_pandas():
    data = TestDataBasePandas()
    data.load_data()
    data_distribution = IidDataDistribution(data)

    train_data, train_label = data_distribution._database.train

    num_nodes = 3
    percent = 60
    # weights = np.full(num_nodes, 1/num_nodes)
    weights = [0.5, 0.25, 0.25]
    federated_data, federated_label = data_distribution.make_data_federated(train_data,
                                                                            train_label,
                                                                            percent,
                                                                            num_nodes,
                                                                            weights)
    data_distribution.get_federated_data(3)

    all_data = pd.concat(federated_data)
    all_label = pd.concat(federated_label)

    for i, weight in enumerate(weights):
        assert federated_data[i].shape[0] == int(weight * int(percent * train_data.shape[0] / 100))

    assert all_data.shape[0] == int(percent * train_data.shape[0] / 100)
    assert num_nodes == len(federated_data) == len(federated_label)
    pd.testing.assert_frame_equal(all_data, train_data.iloc[all_data.index.values])
    pd.testing.assert_frame_equal(all_label, train_label.iloc[all_data.index.values])

    #test make federated data with replacement
    federated_data, federated_label = data_distribution.make_data_federated(train_data,
                                                                            train_label,
                                                                            percent,
                                                                            num_nodes,
                                                                            weights,
                                                                            sampling="with_replacement")
    all_data = pd.concat(federated_data)
    all_label = pd.concat(federated_label)

    for i, weight in enumerate(weights):
        assert federated_data[i].shape[0] == int(weight * int(percent * train_data.shape[0] / 100))

    assert all_data.shape[0] == int(percent * train_data.shape[0] / 100)
    assert num_nodes == len(federated_data) == len(federated_label)
    pd.testing.assert_frame_equal(all_data, train_data.iloc[all_data.index.values])
    pd.testing.assert_frame_equal(all_label, train_label.iloc[all_data.index.values])
def test_run_rounds():
    np.random.seed(123)
    model_builder = Mock
    database = TestDataBase()
    database.load_data()
    db = IidDataDistribution(database)

    num_nodes = 3
    federated_data, test_data, test_label = db.get_federated_data(num_nodes)

    iowa_fg = IowaFederatedGovernment(model_builder, federated_data)

    n = 1

    iowa_fg.deploy_central_model = Mock()
    iowa_fg.train_all_clients = Mock()
    iowa_fg.evaluate_clients = Mock()
    iowa_fg.performance_clients = Mock()
    iowa_fg.performance_clients.return_value = 0
    iowa_fg._aggregator.set_ponderation = Mock()
    iowa_fg.aggregate_weights = Mock()
    iowa_fg.evaluate_global_model = Mock()

    iowa_fg.run_rounds(n, test_data, test_label)
    # Replicate test an validate data
    randomize = [0, 9, 3, 4, 6, 8, 2, 1, 5, 7]
    test_data = test_data[randomize, ]
    test_label = test_label[randomize]
    validation_data = test_data[:int(0.15 * len(test_label)), ]
    validation_label = test_label[:int(0.15 * len(test_label))]
    test_data = test_data[int(0.15 * len(test_label)):, ]
    test_label = test_label[int(0.15 * len(test_label)):]

    iowa_fg.deploy_central_model.assert_called_once()
    iowa_fg.deploy_central_model.assert_called_once()
    iowa_fg.train_all_clients.assert_called_once()
    iowa_fg.evaluate_clients.assert_called_once()
    assert len(iowa_fg.evaluate_clients.call_args[0]) == 2
    np.testing.assert_array_equal(iowa_fg.evaluate_clients.call_args[0][0],
                                  test_data)
    np.testing.assert_array_equal(iowa_fg.evaluate_clients.call_args[0][1],
                                  test_label)
    iowa_fg.performance_clients.assert_called_once()
    assert len(iowa_fg.performance_clients.call_args[0]) == 2
    np.testing.assert_array_equal(iowa_fg.performance_clients.call_args[0][0],
                                  validation_data)
    np.testing.assert_array_equal(iowa_fg.performance_clients.call_args[0][1],
                                  validation_label)
    iowa_fg._aggregator.set_ponderation.assert_called_once_with(
        iowa_fg.performance_clients.return_value, iowa_fg._dynamic, iowa_fg._a,
        iowa_fg._b, iowa_fg._c, iowa_fg._y_b, iowa_fg._k)
    iowa_fg.aggregate_weights.assert_called_once()
    iowa_fg.evaluate_global_model.assert_called_once()
    assert len(iowa_fg.evaluate_global_model.call_args[0]) == 2
    np.testing.assert_array_equal(iowa_fg.evaluate_clients.call_args[0][0],
                                  test_data)
    np.testing.assert_array_equal(
        iowa_fg.evaluate_global_model.call_args[0][1], test_label)
def test_evaluate_global_model():
    model_builder = Mock
    aggregator = Mock()
    database = TestDataBase()
    database.load_data()
    db = IidDataDistribution(database)

    num_nodes = 3
    federated_data, test_data, test_labels = db.get_federated_data(num_nodes)

    fdg = FederatedGovernment(model_builder, federated_data, aggregator)
    fdg._model.evaluate.return_value = np.random.randint(0, 10, 40)

    fdg.evaluate_global_model(test_data, test_labels)
    fdg._model.evaluate.assert_called_once_with(test_data, test_labels)
def test_federated_government_private_data():
    model_builder = Mock
    aggregator = Mock()
    database = TestDataBase()
    database.load_data()
    db = IidDataDistribution(database)
    federated_data, test_data, test_labels = db.get_federated_data(3)

    la = TestFederatedGovernment(model_builder, federated_data, aggregator,
                                 UnprotectedAccess())

    for node in la._federated_data:
        assert isinstance(node._model, model_builder)

    assert isinstance(la.global_model, model_builder)
    assert aggregator.id == la._aggregator.id
def test_deploy_central_model():
    model_builder = Mock
    aggregator = Mock()
    database = TestDataBase()
    database.load_data()
    db = IidDataDistribution(database)

    num_nodes = 3
    federated_data, test_data, test_labels = db.get_federated_data(num_nodes)

    fdg = FederatedGovernment(model_builder, federated_data, aggregator)
    array_params = np.random.rand(30)
    fdg._model.get_model_params.return_value = array_params

    fdg.deploy_central_model()

    for node in fdg._federated_data:
        node._model.set_model_params.assert_called_once()
def test_aggregate_weights():
    model_builder = Mock
    aggregator = Mock()
    database = TestDataBase()
    database.load_data()
    db = IidDataDistribution(database)

    num_nodes = 3
    federated_data, test_data, test_labels = db.get_federated_data(num_nodes)

    fdg = FederatedGovernment(model_builder, federated_data, aggregator)

    weights = np.random.rand(64, 32)
    fdg._aggregator.aggregate_weights.return_value = weights

    fdg.aggregate_weights()

    fdg._model.set_model_params.assert_called_once_with(weights)
def test_train_all_clients():
    model_builder = Mock
    aggregator = Mock()
    database = TestDataBase()
    database.load_data()
    db = IidDataDistribution(database)

    num_nodes = 3
    federated_data, test_data, test_labels = db.get_federated_data(num_nodes)

    fdg = FederatedGovernment(model_builder, federated_data, aggregator)

    fdg.train_all_clients()

    fdg._federated_data.configure_data_access(UnprotectedAccess())
    for node in fdg._federated_data:
        labeled_data = node.query()
        node._model.train.assert_called_once_with(labeled_data.data,
                                                  labeled_data.label)
def test_performance_clients():
    model_builder = Mock
    database = TestDataBase()
    database.load_data()
    db = IidDataDistribution(database)

    num_nodes = 3
    federated_data, test_data, test_labels = db.get_federated_data(num_nodes)

    iowa_fg = IowaFederatedGovernment(model_builder, federated_data)
    for i, data_node in enumerate(iowa_fg._federated_data):
        data_node.performance = Mock()
        data_node.performance.return_value = i
    res = np.arange(iowa_fg._federated_data.num_nodes())

    data_val = np.random.rand(25).reshape((5, 5))
    labels_val = np.random.randint(0, 2, 5)
    performance = iowa_fg.performance_clients(data_val, labels_val)

    assert np.array_equal(performance, res)
    for data_node in iowa_fg._federated_data:
        data_node.performance.assert_called_once_with(data_val, labels_val)
def test_evaluate_clients_local():
    model_builder = Mock
    aggregator = Mock()
    database = TestDataBase()
    database.load_data()
    db = IidDataDistribution(database)

    num_nodes = 3
    federated_data, test_data, test_labels = db.get_federated_data(num_nodes)

    fdg = FederatedGovernment(model_builder, federated_data, aggregator)

    for node in fdg._federated_data:
        node.evaluate = Mock()
        node.evaluate.return_value = [
            np.random.randint(0, 10, 40),
            np.random.randint(0, 10, 40)
        ]

    fdg.evaluate_clients(test_data, test_labels)

    for node in fdg._federated_data:
        node.evaluate.assert_called_once_with(test_data, test_labels)