def test_mean_center_predictor_algo(self): algo = UserBasedNeighborhoodAlgorithm( NeighborhoodBasedConfig( sim_config=SimilarityConfig(name="person"), topk=2, predictor_config=PredictorConfig(name="mean_center"))) algo.fit(test_rating_data) rating_hat = algo.predict(None) print(rating_hat) print(rmse(rating_hat, ma.masked_equal(test_rating_data, 0))) # 0.626923301909203
def test_item_based_regression_model(self): algo = ItemBasedRegressionModel( RegressionModelNeighborhoodBasedConfig(topk=3, lr=0.005, epochs=200, wdecay=0.0001)) algo.fit(test_rating_data) hat_rating = algo.predict() print(statistic(ma.masked_equal(test_rating_data, 0), hat_rating)) print(rmse(ma.masked_equal(test_rating_data, 0), hat_rating)) print(algo.__parameters__())
def test_idf_sim_algo(self): algo = UserBasedNeighborhoodAlgorithm( NeighborhoodBasedConfig( sim_config=SimilarityConfig(name="idf_person"), topk=2, sim_threshold=None, predictor_config=PredictorConfig(name="mean_center"))) algo.fit(test_rating_data) rating_hat = algo.predict(None) print(rating_hat) print(rmse(rating_hat, ma.masked_equal(test_rating_data, 0))) # 0.9546439193233666
def test_pca_person_sim_algo(self): algo = ItemBasedNeighborhoodAlgorithm( NeighborhoodBasedConfig( sim_config=SimilarityConfig(name="pca_person", dims=2), topk=2, sim_threshold=None, predictor_config=PredictorConfig(name="norm"))) algo.fit(test_rating_data) rating_hat = algo.predict(None) print(rating_hat) print(rmse(rating_hat, ma.masked_equal(test_rating_data, 0))) # 0.6237294778495414
def test_amplify_sim_algo(self): algo = UserBasedNeighborhoodAlgorithm( NeighborhoodBasedConfig( sim_config=SimilarityConfig(name="amplify_person", amplify_alpha=3), topk=2, sim_threshold=None, predictor_config=PredictorConfig(name="mean_sigma"))) algo.fit(test_rating_data) rating_hat = algo.predict(None) print(rating_hat) print(rmse(rating_hat, ma.masked_equal(test_rating_data, 0))) # 0.5921262449508355
def test_mean_sigma_predictor_algo(self): algo = UserBasedNeighborhoodAlgorithm( NeighborhoodBasedConfig( sim_config=SimilarityConfig(name="discounted_person", discounted_beta=6), topk=2, sim_threshold=None, predictor_config=PredictorConfig(name="mean_sigma"))) algo.fit(test_rating_data) rating_hat = algo.predict(None) print(rating_hat) print(rmse(rating_hat, ma.masked_equal(test_rating_data, 0))) # 0.6249970642035232
def test_norm_predictor_algo(self): algo = UserBasedNeighborhoodAlgorithm( NeighborhoodBasedConfig( sim_config=SimilarityConfig(name="person", discounted_beta=None), topk=2, sim_threshold=None, predictor_config=PredictorConfig(name="norm"))) algo.fit(test_rating_data) rating_hat = algo.predict(None) print(rating_hat) print(rmse(rating_hat, ma.masked_equal(test_rating_data, 0))) # 2.125021074648673
def test_mean_center_predictor_algo_with_sim_threshold(self): algo = UserBasedNeighborhoodAlgorithm( NeighborhoodBasedConfig( sim_config=SimilarityConfig(name="person", discounted_beta=None), topk=None, sim_threshold=0.0, predictor_config=PredictorConfig(name="mean_center"))) algo.fit(test_rating_data) rating_hat = algo.predict(None) print(rating_hat) print(rmse(rating_hat, ma.masked_equal(test_rating_data, 0))) # 0.6314698247230878
def test_mean_center_predictor_algo(self): algo = ItemBasedNeighborhoodAlgorithm( NeighborhoodBasedConfig( sim_config=SimilarityConfig(name="cosine"), topk=2, sim_threshold=None, predictor_config=PredictorConfig(name="mean_center"))) algo.fit(test_rating_data) rating_hat = algo.predict(None) print(rating_hat) print(rmse(rating_hat, ma.masked_equal(test_rating_data, 0))) # 0.6543287113746402
parser.add_argument("--epochs", type=int, default=10000, help="epochs about regression model") parser.add_argument("--weight_decay", type=float, default=0.001, help="weight decay or l2 regularization") parser.add_argument("--check_gradient", type=bool, default=False, help="check gradient in optimization") parser.add_argument("--model_dir", type=str, help="model directory to save and restore") if __name__ == "__main__": args = parser.parse_args() logger.info("--------------------------------------------------") print("neighborhood args: {}".format(args)) ml = MovieLenDataset(args.dataset) if args.algo == "user": algorithm = UserBasedNeighborhoodAlgorithm(NeighborhoodBasedConfig(sim_config=SimilarityConfig(name=args.sim, discounted_beta=args.discounted_beta, amplify_alpha=args.amplify_alpha, dims=args.dims), topk=args.topk, sim_threshold=args.sim_threshold, predictor_config=PredictorConfig(name=args.predictor))) elif args.algo == "item": algorithm = ItemBasedNeighborhoodAlgorithm(NeighborhoodBasedConfig(sim_config=SimilarityConfig(name=args.sim, discounted_beta=args.discounted_beta, amplify_alpha=args.amplify_alpha, dims=args.dims), topk=args.topk, sim_threshold=args.sim_threshold, predictor_config=PredictorConfig(name=args.predictor))) elif args.algo == "user_reg": algorithm = UserBasedRegressionModel(RegressionModelNeighborhoodBasedConfig(topk=args.topk, lr=args.learn_rate, epochs=args.epochs, wdecay=args.weight_decay, check_gradient=args.check_gradient, model_dir=args.model_dir)) elif args.algo == "item_reg": algorithm = ItemBasedRegressionModel(RegressionModelNeighborhoodBasedConfig(topk=args.topk, lr=args.learn_rate, epochs=args.epochs, wdecay=args.weight_decay, check_gradient=args.check_gradient, model_dir=args.model_dir)) else: print("[USAGE] algo must be in [user,item,user_reg,item_reg]") sys.exit(-1) algorithm.fit(ml.R) hat_rating = algorithm.predict(None) rating = ma.masked_equal(ml.R, 0) logger.debug("rating_count: {rating_count}\tpredict_count:{predict_count}\tpredict_rate:{predict_rate}".format(**metric.statistic(rating, hat_rating))) logger.info("|{}|{}|".format(args, metric.rmse(rating[:, 0], hat_rating[:, 0])))
parser.add_argument("--epochs", type=int, default=1000, help="epochs about regression model") parser.add_argument("--model_dir", type=str, help="model directory to save and restore") if __name__ == "__main__": args = parser.parse_args() logger.info("--------------------------------------------------") print("args: {}".format(args)) ml = MovieLenDataset(args.dataset) if args.algo == "umf": config = LatentFactorConfig(model_dir=args.model_dir, embedding_size=args.embedding_size, epochs=args.epochs, learn_rate=args.learn_rate) algorithm = UnconstrainedMatrixFactorAlgorithm(config) else: print("[USAGE] algo must be in [umf, ]") sys.exit(-1) algorithm.fit(ml.R) hat_rating = algorithm.predict(None) rating = ma.masked_equal(ml.R, 0) logger.info("|{}|{}|".format(args, metric.rmse(rating[:, 0], hat_rating[:, 0])))
def test_rmse(self): y = ma.masked_equal([1, 2, 0, 1], 0) y_hat = ma.masked_equal([0, 1, 2, 2], 0) assert math.isclose(metric.rmse(y, y_hat), 1)