train_blocks: List[RelationBlock] = [] test_blocks: List[RelationBlock] = [] for source, target in [(df_train, train_blocks), (df_test, test_blocks)]: unique_users, user_map = np.unique(source.user_id, return_inverse=True) target.append(RelationBlock(user_map, augment_user_id(unique_users))) unique_movies, movie_map = np.unique(source.movie_id, return_inverse=True) target.append(RelationBlock(movie_map, augment_movie_id(unique_movies))) trace_path = "rmse_{0}_fold_{1}.csv".format(ALGORITHM, FOLD_INDEX) callback: LibFMLikeCallbackBase fm: Union[MyFMGibbsRegressor, MyFMOrderedProbit] if ALGORITHM == "regression": fm = myfm.MyFMRegressor(rank=DIMENSION) callback = RegressionCallback( n_iter=ITERATION, X_test=X_date_test, y_test=df_test.rating.values, X_rel_test=test_blocks, clip_min=df_train.rating.min(), clip_max=df_train.rating.max(), trace_path=trace_path, ) else: fm = myfm.MyFMOrderedProbit(rank=DIMENSION) callback = OrderedProbitCallback( n_iter=ITERATION, X_test=X_date_test, y_test=df_test.rating.values,
X_movie[i, i] = 1 for mid, watched in movie_vs_watched.items(): if not watched: continue m_iid = movie_to_internal[mid] u_iids = [ len(movie_to_internal) + user_to_internal[uid] for uid in watched ] X_movie[m_iid, u_iids] = 1 / max(1, len(watched))**0.5 X_movie = X_movie.tocsr() rb_1 = RelationBlock(df_train.user_id.map(user_to_internal).values, X_user) rb_2 = RelationBlock(df_train.movie_id.map(movie_to_internal).values, X_movie) fmr = myfm.MyFMRegressor(rank=12) fmr.fit(None, df_train.rating.values, X_rel=[rb_1, rb_2], n_iter=200, n_kept_samples=100) print(fmr.fms_) main = sps.csr_matrix((80000, 0), dtype=np.float64) print(((fmr.predict(main, [rb_1, rb_2]) - df_train.rating.values)**2).mean()**0.5) with open('test.pkl', 'wb') as ofs: import pickle pickle.dump(fmr, ofs)