예제 #1
0
 def test_titanic(self):
     """
     Test performance on titanic dataset
     """
     corruption_1 = Duplication(proportion=0.8, duplicated_partner_id=0)
     titanic_scenario_1 = Scenario(2, [0.4, 0.6],
                                   epoch_count=3,
                                   minibatch_count=1,
                                   dataset_name='titanic')
     titanic_scenario_2 = Scenario(3, [0.2, 0.2, 0.6],
                                   corruption_parameters=[
                                       'not-corrupted', corruption_1,
                                       'not-corrupted'
                                   ],
                                   epoch_count=3,
                                   minibatch_count=1,
                                   dataset_name='titanic')
     exp = Experiment(experiment_name='end_to_end_titanic',
                      scenarios_list=[titanic_scenario_1],
                      nb_repeats=2,
                      is_save=True)
     exp.add_scenario(titanic_scenario_2)
     exp.run()
     titanic_scenario_1.run()
     assert np.min(titanic_scenario_1.mpl.history.score) > 0.65
     result = pd.read_csv(exp.experiment_path / 'results.csv')
     assert (result.groupby('scenario_index').mean().mpl_test_score >
             0.65).all()
 def test_add_scenario(self, create_experiment):
     exp = create_experiment
     sc = Scenario(2, [0.5, 0.5], dataset='titanic')
     assert len(exp.scenarios_list
                ) == 0, 'Scenario list should be empty when initialized'
     exp.add_scenario(sc)
     assert exp.scenarios_list[0] is sc, 'Failed to add a scenario'
def create_Scenario(request):
    dataset = request.param[0]()
    samples_split_option = request.param[1]
    corruption = request.param[2]
    params = {"dataset": dataset}
    params.update({
        "partners_count": 3,
        "amounts_per_partner": [0.3, 0.5, 0.2],
        "samples_split_option": samples_split_option,
        "corruption_parameters": corruption,
    })
    params.update({
        "contributivity_methods": ["Shapley values", "Independent scores"],
        "multi_partner_learning_approach":
        "fedavg",
        "aggregation":
        "uniform",
    })
    params.update({
        "gradient_updates_per_pass_count": 5,
        "epoch_count": 2,
        "minibatch_count": 2,
        "is_early_stopping": True,
    })
    params.update({"init_model_from": "random_initialization"})
    params.update({"is_quick_demo": False})

    # scenario_.dataset object is created inside the Scenario constructor
    scenario_ = Scenario(**params, scenario_id=0)

    scenario_.mpl = scenario_._multi_partner_learning_approach(
        scenario_, is_save_data=True)

    return scenario_
    def test_titanic(self):
        """
        Test performance on titanic dataset
        """

        titanic_scenario = Scenario(2, [0.4, 0.6], epoch_count=3, minibatch_count=1, dataset_name='titanic')
        titanic_scenario.run()

        assert np.min(titanic_scenario.mpl.history.score) > 0.65
def create_Scenario(request):
    dataset = request.param[0]()
    samples_split_option = request.param[1]

    params = {"dataset": dataset}
    params.update({
        "partners_count": 3,
        "amounts_per_partner": [0.2, 0.5, 0.3],
        "samples_split_option": samples_split_option,
        "corrupted_datasets": ["not_corrupted"] * 3,
    })
    params.update({
        "methods": ["Shapley values", "Independent scores"],
        "multi_partner_learning_approach": "fedavg",
        "aggregation": "uniform",
    })
    params.update({
        "gradient_updates_per_pass_count": 5,
        "epoch_count": 2,
        "minibatch_count": 2,
        "is_early_stopping": True,
    })
    params.update({"init_model_from": "random_initialization"})
    params.update({"is_quick_demo": False})

    full_experiment_name = "unit-test-pytest"
    experiment_path = (Path.cwd() / constants.EXPERIMENTS_FOLDER_NAME /
                       full_experiment_name)

    # scenario_.dataset object is created inside the Scenario constructor
    scenario_ = Scenario(**params,
                         experiment_path=experiment_path,
                         scenario_id=0,
                         repeats_count=1)

    scenario_.mpl = scenario_.multi_partner_learning_approach(
        scenario_, is_save_data=True)

    scenario_.instantiate_scenario_partners()
    # Split data according to scenario and then pre-process successively...
    # ... train data, early stopping validation data, test data
    if scenario_.samples_split_type == "basic":
        scenario_.split_data()
    elif scenario_.samples_split_type == "advanced":
        scenario_.split_data_advanced()
    scenario_.compute_batch_sizes()
    scenario_.data_corruption()

    return scenario_
def create_MultiPartnerLearning(create_all_datasets):
    data = create_all_datasets
    # Create partners_list (this is not a fixture):
    scenario = Scenario(3, [0.3, 0.3, 0.4], dataset=data)
    mpl = FederatedAverageLearning(
        scenario,
        epoch_count=2,
        minibatch_count=2,
        dataset=data,
        aggregation=UniformAggregator,
        is_early_stopping=True,
        is_save_data=False,
    )

    yield mpl
예제 #7
0
    def test_all_mpl_approaches(self):
        """
        Test all the mpl approaches
        """

        exp = Experiment()
        mpl_approaches = multi_partner_learning.MULTI_PARTNER_LEARNING_APPROACHES.copy(
        )

        for approach in mpl_approaches:
            exp.add_scenario(
                Scenario(2, [0.25, 0.75],
                         epoch_count=2,
                         minibatch_count=2,
                         dataset_name='mnist',
                         dataset_proportion=0.1,
                         multi_partner_learning_approach=approach,
                         gradient_updates_per_pass_count=3))
        exp.run()

        df = exp.result
        assert len(df) == len(mpl_approaches)