예제 #1
0
 def test_mean_center_predictor_algo(self):
     algo = UserBasedNeighborhoodAlgorithm(
         NeighborhoodBasedConfig(
             sim_config=SimilarityConfig(name="person"),
             topk=2,
             predictor_config=PredictorConfig(name="mean_center")))
     algo.fit(test_rating_data)
     rating_hat = algo.predict(None)
     print(rating_hat)
     print(rmse(rating_hat, ma.masked_equal(test_rating_data,
                                            0)))  # 0.626923301909203
 def test_item_based_regression_model(self):
     algo = ItemBasedRegressionModel(
         RegressionModelNeighborhoodBasedConfig(topk=3,
                                                lr=0.005,
                                                epochs=200,
                                                wdecay=0.0001))
     algo.fit(test_rating_data)
     hat_rating = algo.predict()
     print(statistic(ma.masked_equal(test_rating_data, 0), hat_rating))
     print(rmse(ma.masked_equal(test_rating_data, 0), hat_rating))
     print(algo.__parameters__())
예제 #3
0
 def test_idf_sim_algo(self):
     algo = UserBasedNeighborhoodAlgorithm(
         NeighborhoodBasedConfig(
             sim_config=SimilarityConfig(name="idf_person"),
             topk=2,
             sim_threshold=None,
             predictor_config=PredictorConfig(name="mean_center")))
     algo.fit(test_rating_data)
     rating_hat = algo.predict(None)
     print(rating_hat)
     print(rmse(rating_hat, ma.masked_equal(test_rating_data,
                                            0)))  # 0.9546439193233666
예제 #4
0
 def test_pca_person_sim_algo(self):
     algo = ItemBasedNeighborhoodAlgorithm(
         NeighborhoodBasedConfig(
             sim_config=SimilarityConfig(name="pca_person", dims=2),
             topk=2,
             sim_threshold=None,
             predictor_config=PredictorConfig(name="norm")))
     algo.fit(test_rating_data)
     rating_hat = algo.predict(None)
     print(rating_hat)
     print(rmse(rating_hat, ma.masked_equal(test_rating_data,
                                            0)))  # 0.6237294778495414
예제 #5
0
 def test_amplify_sim_algo(self):
     algo = UserBasedNeighborhoodAlgorithm(
         NeighborhoodBasedConfig(
             sim_config=SimilarityConfig(name="amplify_person",
                                         amplify_alpha=3),
             topk=2,
             sim_threshold=None,
             predictor_config=PredictorConfig(name="mean_sigma")))
     algo.fit(test_rating_data)
     rating_hat = algo.predict(None)
     print(rating_hat)
     print(rmse(rating_hat, ma.masked_equal(test_rating_data,
                                            0)))  # 0.5921262449508355
예제 #6
0
 def test_mean_sigma_predictor_algo(self):
     algo = UserBasedNeighborhoodAlgorithm(
         NeighborhoodBasedConfig(
             sim_config=SimilarityConfig(name="discounted_person",
                                         discounted_beta=6),
             topk=2,
             sim_threshold=None,
             predictor_config=PredictorConfig(name="mean_sigma")))
     algo.fit(test_rating_data)
     rating_hat = algo.predict(None)
     print(rating_hat)
     print(rmse(rating_hat, ma.masked_equal(test_rating_data,
                                            0)))  # 0.6249970642035232
예제 #7
0
 def test_norm_predictor_algo(self):
     algo = UserBasedNeighborhoodAlgorithm(
         NeighborhoodBasedConfig(
             sim_config=SimilarityConfig(name="person",
                                         discounted_beta=None),
             topk=2,
             sim_threshold=None,
             predictor_config=PredictorConfig(name="norm")))
     algo.fit(test_rating_data)
     rating_hat = algo.predict(None)
     print(rating_hat)
     print(rmse(rating_hat, ma.masked_equal(test_rating_data,
                                            0)))  # 2.125021074648673
예제 #8
0
 def test_mean_center_predictor_algo_with_sim_threshold(self):
     algo = UserBasedNeighborhoodAlgorithm(
         NeighborhoodBasedConfig(
             sim_config=SimilarityConfig(name="person",
                                         discounted_beta=None),
             topk=None,
             sim_threshold=0.0,
             predictor_config=PredictorConfig(name="mean_center")))
     algo.fit(test_rating_data)
     rating_hat = algo.predict(None)
     print(rating_hat)
     print(rmse(rating_hat, ma.masked_equal(test_rating_data,
                                            0)))  # 0.6314698247230878
예제 #9
0
    def test_mean_center_predictor_algo(self):
        algo = ItemBasedNeighborhoodAlgorithm(
            NeighborhoodBasedConfig(
                sim_config=SimilarityConfig(name="cosine"),
                topk=2,
                sim_threshold=None,
                predictor_config=PredictorConfig(name="mean_center")))
        algo.fit(test_rating_data)
        rating_hat = algo.predict(None)

        print(rating_hat)
        print(rmse(rating_hat, ma.masked_equal(test_rating_data,
                                               0)))  # 0.6543287113746402
예제 #10
0
parser.add_argument("--epochs", type=int, default=10000, help="epochs about regression model")
parser.add_argument("--weight_decay", type=float, default=0.001, help="weight decay or l2 regularization")
parser.add_argument("--check_gradient", type=bool, default=False, help="check gradient in optimization")
parser.add_argument("--model_dir", type=str, help="model directory to save and restore")


if __name__ == "__main__":
    args = parser.parse_args()
    logger.info("--------------------------------------------------")
    print("neighborhood args: {}".format(args))
    ml = MovieLenDataset(args.dataset)

    if args.algo == "user":
        algorithm = UserBasedNeighborhoodAlgorithm(NeighborhoodBasedConfig(sim_config=SimilarityConfig(name=args.sim, discounted_beta=args.discounted_beta, amplify_alpha=args.amplify_alpha, dims=args.dims), topk=args.topk, sim_threshold=args.sim_threshold, predictor_config=PredictorConfig(name=args.predictor)))
    elif args.algo == "item":
        algorithm = ItemBasedNeighborhoodAlgorithm(NeighborhoodBasedConfig(sim_config=SimilarityConfig(name=args.sim, discounted_beta=args.discounted_beta, amplify_alpha=args.amplify_alpha, dims=args.dims), topk=args.topk, sim_threshold=args.sim_threshold, predictor_config=PredictorConfig(name=args.predictor)))
    elif args.algo == "user_reg":
        algorithm = UserBasedRegressionModel(RegressionModelNeighborhoodBasedConfig(topk=args.topk, lr=args.learn_rate, epochs=args.epochs, wdecay=args.weight_decay, check_gradient=args.check_gradient, model_dir=args.model_dir))
    elif args.algo == "item_reg":
        algorithm = ItemBasedRegressionModel(RegressionModelNeighborhoodBasedConfig(topk=args.topk, lr=args.learn_rate, epochs=args.epochs, wdecay=args.weight_decay, check_gradient=args.check_gradient, model_dir=args.model_dir))
    else:
        print("[USAGE] algo must be in [user,item,user_reg,item_reg]")
        sys.exit(-1)

    algorithm.fit(ml.R)
    hat_rating = algorithm.predict(None)
    rating = ma.masked_equal(ml.R, 0)

    logger.debug("rating_count: {rating_count}\tpredict_count:{predict_count}\tpredict_rate:{predict_rate}".format(**metric.statistic(rating, hat_rating)))
    logger.info("|{}|{}|".format(args, metric.rmse(rating[:, 0], hat_rating[:, 0])))
예제 #11
0
parser.add_argument("--epochs",
                    type=int,
                    default=1000,
                    help="epochs about regression model")
parser.add_argument("--model_dir",
                    type=str,
                    help="model directory to save and restore")

if __name__ == "__main__":
    args = parser.parse_args()
    logger.info("--------------------------------------------------")
    print("args: {}".format(args))
    ml = MovieLenDataset(args.dataset)

    if args.algo == "umf":
        config = LatentFactorConfig(model_dir=args.model_dir,
                                    embedding_size=args.embedding_size,
                                    epochs=args.epochs,
                                    learn_rate=args.learn_rate)
        algorithm = UnconstrainedMatrixFactorAlgorithm(config)
    else:
        print("[USAGE] algo must be in [umf, ]")
        sys.exit(-1)

    algorithm.fit(ml.R)
    hat_rating = algorithm.predict(None)
    rating = ma.masked_equal(ml.R, 0)

    logger.info("|{}|{}|".format(args,
                                 metric.rmse(rating[:, 0], hat_rating[:, 0])))
예제 #12
0
 def test_rmse(self):
     y = ma.masked_equal([1, 2, 0, 1], 0)
     y_hat = ma.masked_equal([0, 1, 2, 2], 0)
     assert math.isclose(metric.rmse(y, y_hat), 1)