예제 #1
0
    train_blocks: List[RelationBlock] = []
    test_blocks: List[RelationBlock] = []
    for source, target in [(df_train, train_blocks), (df_test, test_blocks)]:
        unique_users, user_map = np.unique(source.user_id, return_inverse=True)
        target.append(RelationBlock(user_map, augment_user_id(unique_users)))
        unique_movies, movie_map = np.unique(source.movie_id,
                                             return_inverse=True)
        target.append(RelationBlock(movie_map,
                                    augment_movie_id(unique_movies)))

    trace_path = "rmse_{0}_fold_{1}.csv".format(ALGORITHM, FOLD_INDEX)

    callback: LibFMLikeCallbackBase
    fm: Union[MyFMGibbsRegressor, MyFMOrderedProbit]
    if ALGORITHM == "regression":
        fm = myfm.MyFMRegressor(rank=DIMENSION)
        callback = RegressionCallback(
            n_iter=ITERATION,
            X_test=X_date_test,
            y_test=df_test.rating.values,
            X_rel_test=test_blocks,
            clip_min=df_train.rating.min(),
            clip_max=df_train.rating.max(),
            trace_path=trace_path,
        )
    else:
        fm = myfm.MyFMOrderedProbit(rank=DIMENSION)
        callback = OrderedProbitCallback(
            n_iter=ITERATION,
            X_test=X_date_test,
            y_test=df_test.rating.values,
예제 #2
0
    X_movie[i, i] = 1

for mid, watched in movie_vs_watched.items():
    if not watched:
        continue
    m_iid = movie_to_internal[mid]
    u_iids = [
        len(movie_to_internal) + user_to_internal[uid] for uid in watched
    ]
    X_movie[m_iid, u_iids] = 1 / max(1, len(watched))**0.5

X_movie = X_movie.tocsr()

rb_1 = RelationBlock(df_train.user_id.map(user_to_internal).values, X_user)
rb_2 = RelationBlock(df_train.movie_id.map(movie_to_internal).values, X_movie)

fmr = myfm.MyFMRegressor(rank=12)

fmr.fit(None,
        df_train.rating.values,
        X_rel=[rb_1, rb_2],
        n_iter=200,
        n_kept_samples=100)
print(fmr.fms_)
main = sps.csr_matrix((80000, 0), dtype=np.float64)
print(((fmr.predict(main, [rb_1, rb_2]) -
        df_train.rating.values)**2).mean()**0.5)
with open('test.pkl', 'wb') as ofs:
    import pickle
    pickle.dump(fmr, ofs)