Ejemplo n.º 1
0
 def user_id_to_relation_block(self, user_ids):
     uid_unique, index = np.unique(user_ids, return_inverse=True)
     X = self.user_encoders.encode_df(
         self.user_unique.reindex(uid_unique).reset_index(),
         right_tables=[self.df_train],
     )
     return RelationBlock(index, X)
Ejemplo n.º 2
0
        X_date_train, X_date_test = (None, None)

    # setup grouping
    feature_group_sizes.extend(user_encoder.encoder_shapes)
    feature_group_sizes.extend(movie_encoder.encoder_shapes)

    # Create RelationBlock.
    train_blocks: List[RelationBlock] = []
    test_blocks: List[RelationBlock] = []
    for source, target in [(df_train, train_blocks), (df_test, test_blocks)]:
        unique_users, user_map = np.unique(source.user_id, return_inverse=True)
        target.append(
            RelationBlock(
                user_map,
                user_encoder.encode_df(
                    pd.DataFrame(dict(user_id=unique_users)),
                    [implicit_data_source],
                ),
            ))
        unique_movies, movie_map = np.unique(source.movie_id,
                                             return_inverse=True)
        target.append(
            RelationBlock(
                movie_map,
                movie_encoder.encode_df(
                    pd.DataFrame(dict(movie_id=unique_movies)),
                    [implicit_data_source],
                ),
            ))

    trace_path = "rmse_variational_fold_{0}.csv".format(FOLD_INDEX)
Ejemplo n.º 3
0
                row.append(index)
                col.append(user_to_internal[uid])
        return sps.hstack([
            X,
            sps.csr_matrix(
                (data, (row, col)),
                shape=(len(movie_ids), len(user_to_internal)),
            ),
        ])

    # Create RelationBlock.
    train_blocks: List[RelationBlock] = []
    test_blocks: List[RelationBlock] = []
    for source, target in [(df_train, train_blocks), (df_test, test_blocks)]:
        unique_users, user_map = np.unique(source.user_id, return_inverse=True)
        target.append(RelationBlock(user_map, augment_user_id(unique_users)))
        unique_movies, movie_map = np.unique(source.movie_id,
                                             return_inverse=True)
        target.append(RelationBlock(movie_map,
                                    augment_movie_id(unique_movies)))

    trace_path = "rmse_{0}_fold_{1}.csv".format(ALGORITHM, FOLD_INDEX)

    callback: LibFMLikeCallbackBase
    fm: Union[MyFMGibbsRegressor, MyFMOrderedProbit]
    if ALGORITHM == "regression":
        fm = myfm.MyFMRegressor(rank=DIMENSION)
        callback = RegressionCallback(
            n_iter=ITERATION,
            X_test=X_date_test,
            y_test=df_test.rating.values,
Ejemplo n.º 4
0
 def movie_id_to_relation_block(self, movie_ids):
     mid_unique, index = np.unique(movie_ids, return_inverse=True)
     X = self.movie_encoders.encode_df(
         self.movie_unique.reindex(mid_unique).reset_index(),
         [self.df_train])
     return RelationBlock(index, X)
Ejemplo n.º 5
0
            if not use_ii:
                continue
            watched_users = movie_vs_watched.get(movie_id, [])
            normalizer = 1 / max(len(watched_users), 1) ** 0.5
            for uid in watched_users:
                X[index, user_to_internal[uid] +
                    len(movie_to_internal)] = normalizer
        return X.tocsr()

    # Create RelationBlock.
    train_blocks = []
    test_blocks = []
    for source, target in [(df_train, train_blocks), (df_test, test_blocks)]:
        unique_users, user_map = np.unique(source.user_id, return_inverse=True)
        target.append(
            RelationBlock(user_map, augment_user_id(unique_users))
        )
        unique_movies, movie_map = np.unique(
            source.movie_id, return_inverse=True)
        target.append(
            RelationBlock(movie_map, augment_movie_id(unique_movies))
        )

    trace_path="rmse_{0}_fold_{1}.csv".format(ALGORITHM, FOLD_INDEX)
    if ALGORITHM == "regression":
        fm = myfm.MyFMRegressor(rank=DIMENSION)
        callback = RegressionCallback(
            ITERATION, X_date_test, df_test.rating.values, X_rel_test=test_blocks,
            clip_min=0.5, clip_max=5.0, trace_path=trace_path
        )
    else: