Ejemplo n.º 1
0
    def initialize_bpmf(self, args, data, query_set):
        bpmf = BPMF(data.ratings,
                    args.latent_d,
                    subtract_mean=args.subtract_mean,
                    rating_values=data.rating_vals,
                    discrete_expectations=args.discrete,
                    num_integration_pts=args.num_integration_pts,
                    knowable=query_set,
                    model_filename=args.model_filename)
        for k, v in args.hyperparams.items():
            assert hasattr(bpmf, k)
            setattr(bpmf, k, v)

        if args.initialize_at_pmf_map:
            import sys
            sys.path.append(
                os.path.abspath(
                    os.path.join(os.path.dirname(__file__), '../python-pmf')))
            try:
                from pmf_cy import ProbabilisticMatrixFactorization
            except ImportError:
                warnings.warn("cython PMF not available; "
                              "using pure-python version")
                from pmf import ProbabilisticMatrixFactorization

            print("getting PMF MAP fit with default hyperparams")
            p = ProbabilisticMatrixFactorization(
                data.ratings,
                latent_d=args.latent_d,
                subtract_mean=args.subtract_mean)
            p.fit()  # TODO: support also doing SGD

            print("okay; BPMF will start from here now")
            # set the mode to the PMF mode
            bpmf.samples(num_samps=1, warmup=0, update_mode=True)
            bpmf.sampled_mode['U'] = p.users
            bpmf.sampled_mode['V'] = p.items
            bpmf.sampled_mode['predictions'] = p.predicted_matrix()
            bpmf.sampled_mode_lp = -np.inf

        return bpmf
def fit(
    real,
    known,
    latent_d=1,
    ret_pmf=False,
    subtract_mean=False,
    sig_u=1e10,
    sig_v=1e10,
    sig=1,
    do_bayes=False,
    burnin=10,
    samps=200,
    stop_thresh=1e-10,
    min_learning_rate=1e-20,
):
    ratings = np.zeros((known.sum(), 3))
    for idx, (i, j) in enumerate(np.transpose(known.nonzero())):
        ratings[idx] = [i, j, real[i, j]]

    pmf = ProbabilisticMatrixFactorization(ratings, latent_d, subtract_mean)
    pmf.sigma_sq = sig
    pmf.sigma_u_sq = sig_u
    pmf.sigma_v_sq = sig_v
    pmf.stop_thresh = stop_thresh
    pmf.min_learning_rate = min_learning_rate
    pmf.fit()

    if not do_bayes:
        pred = pmf.predicted_matrix()
        return (pmf, pred) if ret_pmf else pred
    else:
        bpmf = BayesianPMF(ratings, 1)
        bpmf.__setstate__(pmf.__getstate__())
        sampler = bpmf.samples()

        # do burn-in
        next(islice(sampler, burnin, burnin), None)

        pred = bpmf.predict(islice(sampler, samps))
        return (bpmf, pred) if ret_pmf else pred
    def initialize_bpmf(self, args, data, query_set):
        bpmf = BPMF(data.ratings, args.latent_d,
                subtract_mean=args.subtract_mean,
                rating_values=data.rating_vals,
                discrete_expectations=args.discrete,
                num_integration_pts=args.num_integration_pts,
                knowable=query_set,
                model_filename=args.model_filename)
        for k, v in args.hyperparams.items():
            assert hasattr(bpmf, k)
            setattr(bpmf, k, v)

        if args.initialize_at_pmf_map:
            import sys
            sys.path.append(
                os.path.abspath(os.path.join(os.path.dirname(__file__),
                                             '../python-pmf')))
            try:
                from pmf_cy import ProbabilisticMatrixFactorization
            except ImportError:
                warnings.warn("cython PMF not available; "
                              "using pure-python version")
                from pmf import ProbabilisticMatrixFactorization

            print("getting PMF MAP fit with default hyperparams")
            p = ProbabilisticMatrixFactorization(
                data.ratings, latent_d=args.latent_d,
                subtract_mean=args.subtract_mean)
            p.fit()  # TODO: support also doing SGD

            print("okay; BPMF will start from here now")
            # set the mode to the PMF mode
            bpmf.samples(num_samps=1, warmup=0, update_mode=True)
            bpmf.sampled_mode['U'] = p.users
            bpmf.sampled_mode['V'] = p.items
            bpmf.sampled_mode['predictions'] = p.predicted_matrix()
            bpmf.sampled_mode_lp = -np.inf

        return bpmf
def fit(real,
        known,
        latent_d=1,
        ret_pmf=False,
        subtract_mean=False,
        sig_u=1e10,
        sig_v=1e10,
        sig=1,
        do_bayes=False,
        burnin=10,
        samps=200,
        stop_thresh=1e-10,
        min_learning_rate=1e-20):
    ratings = np.zeros((known.sum(), 3))
    for idx, (i, j) in enumerate(np.transpose(known.nonzero())):
        ratings[idx] = [i, j, real[i, j]]

    pmf = ProbabilisticMatrixFactorization(ratings, latent_d, subtract_mean)
    pmf.sigma_sq = sig
    pmf.sigma_u_sq = sig_u
    pmf.sigma_v_sq = sig_v
    pmf.stop_thresh = stop_thresh
    pmf.min_learning_rate = min_learning_rate
    pmf.fit()

    if not do_bayes:
        pred = pmf.predicted_matrix()
        return (pmf, pred) if ret_pmf else pred
    else:
        bpmf = BayesianPMF(ratings, 1)
        bpmf.__setstate__(pmf.__getstate__())
        sampler = bpmf.samples()

        # do burn-in
        next(islice(sampler, burnin, burnin), None)

        pred = bpmf.predict(islice(sampler, samps))
        return (bpmf, pred) if ret_pmf else pred