def create_models(self):
        """Create learning models and the aggregator."""
        self.all_ratings = AllRatingsWithCommon(
            experts=self.users,
            objects=self.videos,
            output_features=self.features,
            name="prod",
        )

        print_memory(stage="DPLF:ratings_nodata_created")

        # creating models
        self.user_to_model = {
            user: FeaturelessPreferenceLearningModel(
                expert=user, all_ratings=self.all_ratings
            )
            for user in self.users
        }

        print_memory(stage="DPLF:models_created")

        # before creating the aggregator, filling models with data
        self.user_to_size = {
            user: self.fill_model_data(self.user_to_model[user], user)
            for user in tqdmem(self.users, desc="fill_data")
        }

        # virtual 'common' data
        fplm_common = FeaturelessPreferenceLearningModel(
            expert=AllRatingsWithCommon.COMMON_EXPERT, all_ratings=self.all_ratings
        )
        fplm_common.on_dataset_end()

        print_memory(stage="DPLF:data_filled")

        # resetting the model given the data
        self.all_ratings.reset_model()

        print_memory(stage="DPLF:model_reset_ok")

        # aggregating models
        self.aggregator = FeaturelessMedianPreferenceAverageRegularizationAggregator(
            models=[self.user_to_model[u] for u in self.users]
        )
        self.aggregator.certification_status = self.user_certified

        print_memory(stage="DPLF:aggregator_created")
def test_save_load(mode):
    assert mode in ['sparse', 'dense']
    dataset = ToyRandomDataset()
    dataset._generate_many(100)

    all_ratings1 = AllRatingsWithCommon(
        experts=dataset.users,
        objects=dataset.objects,
        output_features=dataset.fields,
        name="tst",
        var_init_cls=VariableIndexLayer
        if mode == 'dense' else SparseVariableIndexLayer,
    )

    # creating models
    models1 = [
        FeaturelessPreferenceLearningModel(expert=user,
                                           all_ratings=all_ratings1)
        for user in dataset.users
    ]

    def load_data_to(models):
        for r in dataset.ratings:
            u_idx = dataset.users.index(r["user"])
            ratings_as_vector = np.array(
                [r["ratings"][k] for k in dataset.fields]) / 100.0
            models[u_idx].register_preference(
                o1=r["o1"],
                o2=r["o2"],
                p1_vs_p2=ratings_as_vector,
                weights=np.ones(len(ratings_as_vector)),
            )

    load_data_to(models1)
    call_on_dataset_end(models1)

    aggregator1 = FeaturelessMedianPreferenceAverageRegularizationAggregator(
        hypers={
            "lambda_": 1.0,
            "mu": 1.0,
            "C": 1.0,
            "default_score_value": 1.0
        },
        models=models1,
        loss_fcn=loss_fcn_dense if mode == 'dense' else loss_fcn_sparse,
    )
    aggregator1.fit(epochs=100)

    all_ratings2 = AllRatingsWithCommon(
        experts=dataset.users,
        objects=dataset.objects,
        output_features=dataset.fields,
        name="tst",
        var_init_cls=VariableIndexLayer
        if mode == 'dense' else SparseVariableIndexLayer,
    )

    # creating models
    models2 = [
        FeaturelessPreferenceLearningModel(expert=user,
                                           all_ratings=all_ratings2)
        for user in dataset.users
    ]

    load_data_to(models2)
    call_on_dataset_end(models2)

    aggregator2 = FeaturelessMedianPreferenceAverageRegularizationAggregator(
        hypers={
            "lambda_": 1.0,
            "mu": 1.0,
            "C": 1.0,
            "default_score_value": 1.0
        },
        loss_fcn=loss_fcn_dense if mode == 'dense' else loss_fcn_sparse,
        models=models2,
    )

    def is_close():
        out1 = aggregator1(dataset.objects)
        out2 = aggregator2(dataset.objects)

        assert isinstance(out1, np.ndarray), type(out1)
        assert isinstance(out2, np.ndarray), type(out2)

        assert out1.shape == out2.shape, (out1.shape, out2.shape)

        out1[out1 == None] = np.nan  # noqa: E711
        out2[out2 == None] = np.nan  # noqa: E711
        out1 = np.array(out1, dtype=np.float32)
        out2 = np.array(out2, dtype=np.float32)

        assert out1.dtype == out2.dtype, (out1.dtype, out2.dtype)
        return np.allclose(out1, out2)

    assert not is_close(), "Outputs already the same"

    save_dir = "./test-" + str(uuid1()) + "/"
    os.mkdir(save_dir)
    aggregator1.save(save_dir)
    aggregator2.load(save_dir)
    assert is_close(), "Outputs differ"

    shutil.rmtree(save_dir)
def test_hardcoded_dataset(mode):
    assert mode in ['sparse', 'dense']
    dataset = ToyHardcodedDataset()
    dataset._generate_many(100)

    all_ratings = AllRatingsWithCommon(
        experts=dataset.users,
        objects=dataset.objects,
        output_features=dataset.fields,
        name="tst",
        var_init_cls=VariableIndexLayer
        if mode == 'dense' else SparseVariableIndexLayer,
    )

    # creating models
    models = [
        FeaturelessPreferenceLearningModel(expert=user,
                                           all_ratings=all_ratings)
        for user in dataset.users
    ]

    for r in dataset.ratings:
        u_idx = dataset.users.index(r["user"])
        ratings_as_vector = np.array([r["ratings"][k]
                                      for k in dataset.fields]) / 100.0
        models[u_idx].register_preference(
            o1=r["o1"],
            o2=r["o2"],
            p1_vs_p2=ratings_as_vector,
            weights=np.ones(len(ratings_as_vector)),
        )

    call_on_dataset_end(models)

    # aggregating models
    aggregator = FeaturelessMedianPreferenceAverageRegularizationAggregator(
        models=models,
        loss_fcn=loss_fcn_dense if mode == 'dense' else loss_fcn_sparse,
        hypers={
            "C": 1.0,
            "mu": 1.0,
            "lambda_": 1.0,
            "default_score_value": 1.0,
            "sample_every": 100
        },
        batch_params=dict(
            sample_experts=5000,
            sample_ratings_per_expert=5000,
            sample_objects_per_expert=5000,
        ),
    )

    aggregator.fit(epochs=1000)

    result = aggregator.models[0](["trump_video"])[0]
    assert isinstance(result, np.ndarray), "Wrong output"

    result = aggregator(["trump_video"])[0]
    assert isinstance(result, np.ndarray), "Wrong output"

    aggregator.plot_loss()
    plt.savefig("_test_plot.png")

    def validate_order(dataset, aggregator):
        """Test that downvoted videos have smaller ratings."""
        for user_id, user in enumerate(dataset.users):
            got_scores = aggregator.models[user_id](dataset.objects)
            expect_scores = dataset.scores_dict[user]
            errors = 0
            for i, feature in enumerate(dataset.fields):
                for i1, o1 in enumerate(dataset.objects):
                    for i2, o2 in enumerate(dataset.objects):
                        if o1 == o2:
                            continue
                        delta1 = got_scores[i2][i] - got_scores[i1][i]
                        if (o1, o2) in expect_scores[feature]:
                            delta2 = expect_scores[feature][(o1, o2)]
                        else:
                            delta2 = 100 - expect_scores[feature][(o2, o1)]
                        delta2 = (delta2 - 50) / 50.0
                        if delta1 * delta2 <= 0:
                            print(
                                f"Invalid result: {user} {feature} {o1} {o2} got"
                                f" {got_scores[i1][i]} {got_scores[i2][i]} rating {delta2}"
                            )
                            errors += 1
                        else:
                            print("Valid result")
            assert not errors, "There were %s errors" % errors

    validate_order(dataset, aggregator)
def test_loss_computation():
    """Implementing the loss once again in numpy

    ...and checking that the tf version computes the same thing."""
    users = range(np.random.randint(1, 100))
    objects = range(np.random.randint(1, 1000))
    fields = range(np.random.randint(1, 100))

    # creating the table
    all_ratings = AllRatingsWithCommon(experts=users,
                                       objects=objects,
                                       output_features=fields,
                                       name="tst",
                                       var_init_cls=VariableIndexLayer)

    # setting a fixed value as the current model parameters
    ratings_val = np.random.randn(1 + len(users), len(objects), len(fields))
    all_ratings.layer.v.assign(ratings_val)

    # creating models
    models = [
        FeaturelessPreferenceLearningModel(expert=user,
                                           all_ratings=all_ratings)
        for user in users
    ]

    # random hyperparamters
    hypers = {
        "C": np.random.rand(),
        "mu": np.random.rand(),
        "lambda_": np.random.rand(),
        "default_score_value": 1.0,
    }

    # aggregating models
    aggregator = FeaturelessMedianPreferenceAverageRegularizationAggregator(
        models=models, hypers=hypers, loss_fcn=loss_fcn_dense)

    # inputs to the loss function
    experts_rating, objects_rating_v1, objects_rating_v2, cmp, weights = (
        [],
        [],
        [],
        [],
        [],
    )
    experts_all, objects_all, num_ratings_all = [], [], []
    objects_common_to_1 = []

    # generating mock data
    n_ratings = np.random.randint(1, 500)
    n_all = np.random.randint(1, 500)
    for r in range(n_ratings):
        experts_rating.append(np.random.choice(users))
        objects_rating_v1.append(np.random.choice(objects))
        objects_rating_v2.append(np.random.choice(objects))
        cmp.append(np.random.randn(len(fields)))
        weights.append(np.random.rand(len(fields)))

    for v in range(n_all):
        experts_all.append(np.random.choice(users))
        objects_all.append(np.random.choice(objects))
        num_ratings_all.append(np.random.randint(1, 50))

    for v in range(n_all):
        objects_common_to_1.append(np.random.choice(objects))

    def np_loss_fcn(
        experts_rating,
        objects_rating_v1,
        objects_rating_v2,
        cmp,
        weights,
        experts_all,
        objects_all,
        num_ratings_all,
        objects_common_to_1,
    ):
        """Compute the loss using numpy, same as aggregator.loss_fcn."""
        result = {}

        # FIT LOSS CALCULATION
        loss_fit = 0.0
        loss_fit_cnt = 0
        for exp, v1, v2, c, wei in zip(experts_rating, objects_rating_v1,
                                       objects_rating_v2, cmp, weights):
            for f in range(len(fields)):
                thetav = ratings_val[exp, v1, f]
                thetaw = ratings_val[exp, v2, f]
                y = c[f]
                w = wei[f]
                elem = np.log(1 + np.exp(y * (thetav - thetaw))) * w
                loss_fit += elem
                loss_fit_cnt += 1
        result["loss_fit"] = loss_fit

        # LOSS M to COMMON computation
        loss_reg_common = 0.0
        loss_reg_common_cnt = 0
        for exp, v, n in zip(experts_all, objects_all, num_ratings_all):
            for f in range(len(fields)):
                theta = ratings_val[exp, v, f]
                s = ratings_val[-1, v, f]

                elem = n / (hypers["C"] + n) * np.abs(theta - s)

                loss_reg_common += elem
                loss_reg_common_cnt += 1

        result["loss_m_to_common"] = loss_reg_common * hypers["lambda_"]

        # LOSS COMMON to 1 COMPUTATION
        loss_reg_c1 = 0.0
        loss_reg_c1_cnt = 0

        for v in objects_common_to_1:
            for f in range(len(fields)):
                s = ratings_val[-1, v, f]

                elem = np.square(s - 1)

                loss_reg_c1 += elem
                loss_reg_c1_cnt += 1

        result["loss_common_to_1"] = loss_reg_c1 * hypers["mu"]

        # TOTAL LOSS COMPUTATION
        result["loss"] = (result["loss_fit"] + result["loss_m_to_common"] +
                          result["loss_common_to_1"])

        return result

    # computing the loss
    args = [
        experts_rating,
        objects_rating_v1,
        objects_rating_v2,
        cmp,
        weights,
        experts_all,
        objects_all,
        num_ratings_all,
        objects_common_to_1,
    ]
    args_names = [
        "experts_rating",
        "objects_rating_v1",
        "objects_rating_v2",
        "cmp",
        "weights",
        "experts_all",
        "objects_all",
        "num_ratings_all",
        "objects_common_to_1",
    ]
    args = [np.array(x) for x in args]
    args = [
        tf.constant(x, dtype=tf.float32)
        if x.dtype == np.float64 else tf.constant(x) for x in args
    ]
    ans_tf = aggregator.loss_fcn(**dict(zip(args_names, args)))
    ans_tf = {k: v.numpy() for k, v in ans_tf.items()}

    # computing the numpy version
    ans_np = np_loss_fcn(
        experts_rating,
        objects_rating_v1,
        objects_rating_v2,
        cmp,
        weights,
        experts_all,
        objects_all,
        num_ratings_all,
        objects_common_to_1,
    )

    # verifying that the results are the same
    assert ans_tf.keys() == ans_np.keys()
    for key in ans_tf.keys():
        assert np.allclose(ans_tf[key],
                           ans_np[key]), f"Wrong value for loss {key}"
        print(f"Correct value for loss {key}")
    def create_aggregator(dataset,
                          mode=None,
                          with_weights=True,
                          with_cert=True):
        assert mode in ["sparse", "dense"]

        var_init_cls = (VariableIndexLayer
                        if mode == "dense" else SparseVariableIndexLayer)
        loss_fcn = loss_fcn_dense if mode == "dense" else loss_fcn_sparse

        all_ratings = AllRatingsWithCommon(
            experts=dataset.users,
            objects=dataset.objects,
            output_features=dataset.fields,
            name="tst",
            var_init_cls=var_init_cls,
        )

        # creating models
        models = [
            FeaturelessPreferenceLearningModel(expert=user,
                                               all_ratings=all_ratings)
            for user in dataset.users
        ]

        for r in dataset.ratings:
            u_idx = dataset.users.index(r["user"])
            ratings_as_vector = (
                np.array([r["ratings"][k] for k in dataset.fields]) / 100.0)
            if with_weights:
                weights_as_vector = np.array(
                    [r["weights"][k] for k in dataset.fields])
            else:
                weights_as_vector = np.ones(len(dataset.fields))

            models[u_idx].register_preference(
                o1=r["o1"],
                o2=r["o2"],
                p1_vs_p2=ratings_as_vector,
                weights=weights_as_vector,
            )

        call_on_dataset_end(models)

        # aggregating models
        aggregator = FeaturelessMedianPreferenceAverageRegularizationAggregator(
            models=models,
            loss_fcn=loss_fcn,
            optimizer=tf.keras.optimizers.SGD(lr=1e-3),
            hypers={
                "C": 1.0,
                "mu": 1.0,
                "lambda_": 1.0,
                "default_score_value": 1.0
            },
            batch_params=dict(
                sample_experts=5000,
                sample_ratings_per_expert=5000,
                sample_objects_per_expert=5000,
            ),
        )

        params = aggregator.all_ratings.layer.v
        params.assign(tf.zeros_like(params))

        if with_cert:
            aggregator.certification_status = [
                np.random.rand() > 0.5 for _ in range(len(dataset.users))
            ]

        return aggregator
class DatabasePreferenceLearnerFeatureless(DatabasePreferenceLearner):
    """Learn models from the database, save/restore."""

    def create_models(self):
        """Create learning models and the aggregator."""
        self.all_ratings = AllRatingsWithCommon(
            experts=self.users,
            objects=self.videos,
            output_features=self.features,
            name="prod",
        )

        print_memory(stage="DPLF:ratings_nodata_created")

        # creating models
        self.user_to_model = {
            user: FeaturelessPreferenceLearningModel(
                expert=user, all_ratings=self.all_ratings
            )
            for user in self.users
        }

        print_memory(stage="DPLF:models_created")

        # before creating the aggregator, filling models with data
        self.user_to_size = {
            user: self.fill_model_data(self.user_to_model[user], user)
            for user in tqdmem(self.users, desc="fill_data")
        }

        # virtual 'common' data
        fplm_common = FeaturelessPreferenceLearningModel(
            expert=AllRatingsWithCommon.COMMON_EXPERT, all_ratings=self.all_ratings
        )
        fplm_common.on_dataset_end()

        print_memory(stage="DPLF:data_filled")

        # resetting the model given the data
        self.all_ratings.reset_model()

        print_memory(stage="DPLF:model_reset_ok")

        # aggregating models
        self.aggregator = FeaturelessMedianPreferenceAverageRegularizationAggregator(
            models=[self.user_to_model[u] for u in self.users]
        )
        self.aggregator.certification_status = self.user_certified

        print_memory(stage="DPLF:aggregator_created")

    def visualize(self):
        """Plot model predictions and losses."""
        self.aggregator.plot_loss()
        self.save_figure()

    def predict_user(self, user, videos):
        # @todo: use vectorized operations
        assert isinstance(user, UserPreferences)
        model = self.user_to_model[user.id]
        result = list(model([v.video_id for v in videos]))

        for i, video in enumerate(videos):
            if not model.ratings_with_object(video.video_id):
                result[i] = None

        return result

    def predict_aggregated(self, videos):
        # @todo: use vectorized operations
        return self.aggregator([v.video_id for v in videos])

    def fit(self, **kwargs):
        """Fit on latest database records."""

        self.stats["dataset_size"] = self.user_to_size

        super(DatabasePreferenceLearnerFeatureless, self).fit(**kwargs)

    def fill_model_data(self, model, user):
        """Populate model data from db."""
        n = 0
        for dct in self.get_dataset(user=user):
            v1, v2, res, w = [
                dct[key] for key in ["video_1", "video_2", "cmp", "weights"]
            ]
            model.register_preference(v1, v2, res, w)
            n += 1
        model.on_dataset_end()
        return n