def test_save_load(mode):
    assert mode in ['sparse', 'dense']
    dataset = ToyRandomDataset()
    dataset._generate_many(100)

    all_ratings1 = AllRatingsWithCommon(
        experts=dataset.users,
        objects=dataset.objects,
        output_features=dataset.fields,
        name="tst",
        var_init_cls=VariableIndexLayer
        if mode == 'dense' else SparseVariableIndexLayer,
    )

    # creating models
    models1 = [
        FeaturelessPreferenceLearningModel(expert=user,
                                           all_ratings=all_ratings1)
        for user in dataset.users
    ]

    def load_data_to(models):
        for r in dataset.ratings:
            u_idx = dataset.users.index(r["user"])
            ratings_as_vector = np.array(
                [r["ratings"][k] for k in dataset.fields]) / 100.0
            models[u_idx].register_preference(
                o1=r["o1"],
                o2=r["o2"],
                p1_vs_p2=ratings_as_vector,
                weights=np.ones(len(ratings_as_vector)),
            )

    load_data_to(models1)
    call_on_dataset_end(models1)

    aggregator1 = FeaturelessMedianPreferenceAverageRegularizationAggregator(
        hypers={
            "lambda_": 1.0,
            "mu": 1.0,
            "C": 1.0,
            "default_score_value": 1.0
        },
        models=models1,
        loss_fcn=loss_fcn_dense if mode == 'dense' else loss_fcn_sparse,
    )
    aggregator1.fit(epochs=100)

    all_ratings2 = AllRatingsWithCommon(
        experts=dataset.users,
        objects=dataset.objects,
        output_features=dataset.fields,
        name="tst",
        var_init_cls=VariableIndexLayer
        if mode == 'dense' else SparseVariableIndexLayer,
    )

    # creating models
    models2 = [
        FeaturelessPreferenceLearningModel(expert=user,
                                           all_ratings=all_ratings2)
        for user in dataset.users
    ]

    load_data_to(models2)
    call_on_dataset_end(models2)

    aggregator2 = FeaturelessMedianPreferenceAverageRegularizationAggregator(
        hypers={
            "lambda_": 1.0,
            "mu": 1.0,
            "C": 1.0,
            "default_score_value": 1.0
        },
        loss_fcn=loss_fcn_dense if mode == 'dense' else loss_fcn_sparse,
        models=models2,
    )

    def is_close():
        out1 = aggregator1(dataset.objects)
        out2 = aggregator2(dataset.objects)

        assert isinstance(out1, np.ndarray), type(out1)
        assert isinstance(out2, np.ndarray), type(out2)

        assert out1.shape == out2.shape, (out1.shape, out2.shape)

        out1[out1 == None] = np.nan  # noqa: E711
        out2[out2 == None] = np.nan  # noqa: E711
        out1 = np.array(out1, dtype=np.float32)
        out2 = np.array(out2, dtype=np.float32)

        assert out1.dtype == out2.dtype, (out1.dtype, out2.dtype)
        return np.allclose(out1, out2)

    assert not is_close(), "Outputs already the same"

    save_dir = "./test-" + str(uuid1()) + "/"
    os.mkdir(save_dir)
    aggregator1.save(save_dir)
    aggregator2.load(save_dir)
    assert is_close(), "Outputs differ"

    shutil.rmtree(save_dir)
def test_hardcoded_dataset(mode):
    assert mode in ['sparse', 'dense']
    dataset = ToyHardcodedDataset()
    dataset._generate_many(100)

    all_ratings = AllRatingsWithCommon(
        experts=dataset.users,
        objects=dataset.objects,
        output_features=dataset.fields,
        name="tst",
        var_init_cls=VariableIndexLayer
        if mode == 'dense' else SparseVariableIndexLayer,
    )

    # creating models
    models = [
        FeaturelessPreferenceLearningModel(expert=user,
                                           all_ratings=all_ratings)
        for user in dataset.users
    ]

    for r in dataset.ratings:
        u_idx = dataset.users.index(r["user"])
        ratings_as_vector = np.array([r["ratings"][k]
                                      for k in dataset.fields]) / 100.0
        models[u_idx].register_preference(
            o1=r["o1"],
            o2=r["o2"],
            p1_vs_p2=ratings_as_vector,
            weights=np.ones(len(ratings_as_vector)),
        )

    call_on_dataset_end(models)

    # aggregating models
    aggregator = FeaturelessMedianPreferenceAverageRegularizationAggregator(
        models=models,
        loss_fcn=loss_fcn_dense if mode == 'dense' else loss_fcn_sparse,
        hypers={
            "C": 1.0,
            "mu": 1.0,
            "lambda_": 1.0,
            "default_score_value": 1.0,
            "sample_every": 100
        },
        batch_params=dict(
            sample_experts=5000,
            sample_ratings_per_expert=5000,
            sample_objects_per_expert=5000,
        ),
    )

    aggregator.fit(epochs=1000)

    result = aggregator.models[0](["trump_video"])[0]
    assert isinstance(result, np.ndarray), "Wrong output"

    result = aggregator(["trump_video"])[0]
    assert isinstance(result, np.ndarray), "Wrong output"

    aggregator.plot_loss()
    plt.savefig("_test_plot.png")

    def validate_order(dataset, aggregator):
        """Test that downvoted videos have smaller ratings."""
        for user_id, user in enumerate(dataset.users):
            got_scores = aggregator.models[user_id](dataset.objects)
            expect_scores = dataset.scores_dict[user]
            errors = 0
            for i, feature in enumerate(dataset.fields):
                for i1, o1 in enumerate(dataset.objects):
                    for i2, o2 in enumerate(dataset.objects):
                        if o1 == o2:
                            continue
                        delta1 = got_scores[i2][i] - got_scores[i1][i]
                        if (o1, o2) in expect_scores[feature]:
                            delta2 = expect_scores[feature][(o1, o2)]
                        else:
                            delta2 = 100 - expect_scores[feature][(o2, o1)]
                        delta2 = (delta2 - 50) / 50.0
                        if delta1 * delta2 <= 0:
                            print(
                                f"Invalid result: {user} {feature} {o1} {o2} got"
                                f" {got_scores[i1][i]} {got_scores[i2][i]} rating {delta2}"
                            )
                            errors += 1
                        else:
                            print("Valid result")
            assert not errors, "There were %s errors" % errors

    validate_order(dataset, aggregator)