예제 #1
0
def recommend_k_fastai(model, test, train, top_k=DEFAULT_K, remove_seen=True):
    with Timer() as t:
        total_users, total_items = model.data.train_ds.x.classes.values()
        total_items = total_items[1:]
        total_users = total_users[1:]
        test_users = test[DEFAULT_USER_COL].unique()
        test_users = np.intersect1d(test_users, total_users)
        users_items = cartesian_product(test_users, total_items)
        users_items = pd.DataFrame(
            users_items, columns=[DEFAULT_USER_COL, DEFAULT_ITEM_COL])
        training_removed = pd.merge(
            users_items,
            train.astype(str),
            on=[DEFAULT_USER_COL, DEFAULT_ITEM_COL],
            how="left",
        )
        training_removed = training_removed[
            training_removed[DEFAULT_RATING_COL].isna()][[
                DEFAULT_USER_COL, DEFAULT_ITEM_COL
            ]]
        topk_scores = score(
            model,
            test_df=training_removed,
            user_col=DEFAULT_USER_COL,
            item_col=DEFAULT_ITEM_COL,
            prediction_col=DEFAULT_PREDICTION_COL,
            top_k=top_k,
        )
    return topk_scores, t
예제 #2
0
def train_fastai(params, data):
    model = collab_learner(data,
                           n_factors=params["n_factors"],
                           y_range=params["y_range"],
                           wd=params["wd"])
    with Timer() as t:
        model.fit_one_cycle(cyc_len=params["epochs"], max_lr=params["max_lr"])
    return model, t
예제 #3
0
def predict_fastai(model, test):
    with Timer() as t:
        preds = score(
            model,
            test_df=test,
            user_col=DEFAULT_USER_COL,
            item_col=DEFAULT_ITEM_COL,
            prediction_col=DEFAULT_PREDICTION_COL,
        )
    return preds, t
예제 #4
0
def predict_svd(model, test):
    with Timer() as t:
        preds = predict(
            model,
            test,
            usercol=DEFAULT_USER_COL,
            itemcol=DEFAULT_ITEM_COL,
            predcol=DEFAULT_PREDICTION_COL,
        )
    return preds, t
예제 #5
0
def recommend_k_lightgcn(model,
                         test,
                         train,
                         top_k=DEFAULT_K,
                         remove_seen=True):
    with Timer() as t:
        topk_scores = model.recommend_k_items(test,
                                              top_k=top_k,
                                              remove_seen=remove_seen)
    return topk_scores, t
예제 #6
0
def recommend_k_cornac(model, test, train, top_k=DEFAULT_K, remove_seen=True):
    with Timer() as t:
        topk_scores = predict_ranking(
            model,
            train,
            usercol=DEFAULT_USER_COL,
            itemcol=DEFAULT_ITEM_COL,
            predcol=DEFAULT_PREDICTION_COL,
            remove_seen=remove_seen,
        )
    return topk_scores, t
예제 #7
0
def test_timer(t):
    t.start()
    assert t.running is True
    time.sleep(1)
    t.stop()
    assert t.running is False
    assert t.interval == pytest.approx(1, abs=TOL)
    with Timer() as t2:
        assert t2.running is True
        time.sleep(1)
    assert t2.interval == pytest.approx(1, abs=TOL)
    assert t2.running is False
예제 #8
0
def recommend_k_ncf(model, test, train, top_k=DEFAULT_K, remove_seen=True):
    with Timer() as t:
        users, items, preds = [], [], []
        item = list(train[DEFAULT_ITEM_COL].unique())
        for user in train[DEFAULT_USER_COL].unique():
            user = [user] * len(item)
            users.extend(user)
            items.extend(item)
            preds.extend(list(model.predict(user, item, is_list=True)))
        topk_scores = pd.DataFrame(
            data={
                DEFAULT_USER_COL: users,
                DEFAULT_ITEM_COL: items,
                DEFAULT_PREDICTION_COL: preds,
            })
        merged = pd.merge(train,
                          topk_scores,
                          on=[DEFAULT_USER_COL, DEFAULT_ITEM_COL],
                          how="outer")
        topk_scores = merged[merged[DEFAULT_RATING_COL].isnull()].drop(
            DEFAULT_RATING_COL, axis=1)
    return topk_scores, t
예제 #9
0
def recommend_k_als(model, test, train, top_k=DEFAULT_K, remove_seen=True):
    with Timer() as t:
        # Get the cross join of all user-item pairs and score them.
        users = train.select(DEFAULT_USER_COL).distinct()
        items = train.select(DEFAULT_ITEM_COL).distinct()
        user_item = users.crossJoin(items)
        dfs_pred = model.transform(user_item)

        # Remove seen items
        dfs_pred_exclude_train = dfs_pred.alias("pred").join(
            train.alias("train"),
            (dfs_pred[DEFAULT_USER_COL] == train[DEFAULT_USER_COL])
            & (dfs_pred[DEFAULT_ITEM_COL] == train[DEFAULT_ITEM_COL]),
            how="outer",
        )
        topk_scores = dfs_pred_exclude_train.filter(
            dfs_pred_exclude_train["train." +
                                   DEFAULT_RATING_COL].isNull()).select(
                                       "pred." + DEFAULT_USER_COL,
                                       "pred." + DEFAULT_ITEM_COL,
                                       "pred." + DEFAULT_PREDICTION_COL,
                                   )
    return topk_scores, t
예제 #10
0
def predict_als(model, test):
    with Timer() as t:
        preds = model.transform(test)
    return preds, t
예제 #11
0
def train_als(params, data):
    symbol = ALS(**params)
    with Timer() as t:
        model = symbol.fit(data)
    return model, t
예제 #12
0
def train_ncf(params, data):
    model = NCF(n_users=data.n_users, n_items=data.n_items, **params)
    with Timer() as t:
        model.fit(data)
    return model, t
예제 #13
0
def train_lightgcn(params, data):
    hparams = prepare_hparams(**params)
    model = LightGCN(hparams, data)
    with Timer() as t:
        model.fit()
    return model, t
예제 #14
0
def train_sar(params, data):
    model = SARSingleNode(**params)
    model.set_index(data)
    with Timer() as t:
        model.fit(data)
    return model, t
예제 #15
0
def train_bivae(params, data):
    model = cornac.models.BiVAECF(**params)
    with Timer() as t:
        model.fit(data)
    return model, t
예제 #16
0
def train_svd(params, data):
    model = surprise.SVD(**params)
    with Timer() as t:
        model.fit(data)
    return model, t
예제 #17
0
def t():
    return Timer()
예제 #18
0
def train_bpr(params, data):
    model = cornac.models.BPR(**params)
    with Timer() as t:
        model.fit(data)
    return model, t
예제 #19
0
    def train(self, dataset, sampler, **kwargs):
        """
        High level function for model training as well as
        evaluation on the validation and test dataset
        """
        num_epochs = kwargs.get("num_epochs", 10)
        batch_size = kwargs.get("batch_size", 128)
        lr = kwargs.get("learning_rate", 0.001)
        val_epoch = kwargs.get("val_epoch", 5)

        num_steps = int(len(dataset.user_train) / batch_size)

        optimizer = tf.keras.optimizers.Adam(learning_rate=lr,
                                             beta_1=0.9,
                                             beta_2=0.999,
                                             epsilon=1e-7)

        loss_function = self.loss_function

        train_loss = tf.keras.metrics.Mean(name="train_loss")

        train_step_signature = [
            {
                "users":
                tf.TensorSpec(shape=(None, 1), dtype=tf.int64),
                "input_seq":
                tf.TensorSpec(shape=(None, self.seq_max_len), dtype=tf.int64),
                "positive":
                tf.TensorSpec(shape=(None, self.seq_max_len), dtype=tf.int64),
                "negative":
                tf.TensorSpec(shape=(None, self.seq_max_len), dtype=tf.int64),
            },
            tf.TensorSpec(shape=(None, 1), dtype=tf.int64),
        ]

        @tf.function(input_signature=train_step_signature)
        def train_step(inp, tar):
            with tf.GradientTape() as tape:
                pos_logits, neg_logits, loss_mask = self(inp, training=True)
                loss = loss_function(pos_logits, neg_logits, loss_mask)

            gradients = tape.gradient(loss, self.trainable_variables)
            optimizer.apply_gradients(zip(gradients, self.trainable_variables))

            train_loss(loss)
            return loss

        T = 0.0
        t0 = Timer()
        t0.start()

        for epoch in range(1, num_epochs + 1):

            step_loss = []
            train_loss.reset_states()
            for step in tqdm(range(num_steps),
                             total=num_steps,
                             ncols=70,
                             leave=False,
                             unit="b"):

                u, seq, pos, neg = sampler.next_batch()

                inputs, target = self.create_combined_dataset(u, seq, pos, neg)

                loss = train_step(inputs, target)
                step_loss.append(loss)

            if epoch % val_epoch == 0:
                t0.stop()
                t1 = t0.interval
                T += t1
                print("Evaluating...")
                t_test = self.evaluate(dataset)
                t_valid = self.evaluate_valid(dataset)
                print(
                    f"\nepoch: {epoch}, time: {T}, valid (NDCG@10: {t_valid[0]}, HR@10: {t_valid[1]})"
                )
                print(
                    f"epoch: {epoch}, time: {T},  test (NDCG@10: {t_test[0]}, HR@10: {t_test[1]})"
                )
                t0.start()

        t_test = self.evaluate(dataset)
        print(
            f"\nepoch: {epoch}, test (NDCG@10: {t_test[0]}, HR@10: {t_test[1]})"
        )

        return t_test