def fit(
    real,
    known,
    latent_d=1,
    ret_pmf=False,
    subtract_mean=False,
    sig_u=1e10,
    sig_v=1e10,
    sig=1,
    do_bayes=False,
    burnin=10,
    samps=200,
    stop_thresh=1e-10,
    min_learning_rate=1e-20,
):
    ratings = np.zeros((known.sum(), 3))
    for idx, (i, j) in enumerate(np.transpose(known.nonzero())):
        ratings[idx] = [i, j, real[i, j]]

    pmf = ProbabilisticMatrixFactorization(ratings, latent_d, subtract_mean)
    pmf.sigma_sq = sig
    pmf.sigma_u_sq = sig_u
    pmf.sigma_v_sq = sig_v
    pmf.stop_thresh = stop_thresh
    pmf.min_learning_rate = min_learning_rate
    pmf.fit()

    if not do_bayes:
        pred = pmf.predicted_matrix()
        return (pmf, pred) if ret_pmf else pred
    else:
        bpmf = BayesianPMF(ratings, 1)
        bpmf.__setstate__(pmf.__getstate__())
        sampler = bpmf.samples()

        # do burn-in
        next(islice(sampler, burnin, burnin), None)

        pred = bpmf.predict(islice(sampler, samps))
        return (bpmf, pred) if ret_pmf else pred
def fit(real,
        known,
        latent_d=1,
        ret_pmf=False,
        subtract_mean=False,
        sig_u=1e10,
        sig_v=1e10,
        sig=1,
        do_bayes=False,
        burnin=10,
        samps=200,
        stop_thresh=1e-10,
        min_learning_rate=1e-20):
    ratings = np.zeros((known.sum(), 3))
    for idx, (i, j) in enumerate(np.transpose(known.nonzero())):
        ratings[idx] = [i, j, real[i, j]]

    pmf = ProbabilisticMatrixFactorization(ratings, latent_d, subtract_mean)
    pmf.sigma_sq = sig
    pmf.sigma_u_sq = sig_u
    pmf.sigma_v_sq = sig_v
    pmf.stop_thresh = stop_thresh
    pmf.min_learning_rate = min_learning_rate
    pmf.fit()

    if not do_bayes:
        pred = pmf.predicted_matrix()
        return (pmf, pred) if ret_pmf else pred
    else:
        bpmf = BayesianPMF(ratings, 1)
        bpmf.__setstate__(pmf.__getstate__())
        sampler = bpmf.samples()

        # do burn-in
        next(islice(sampler, burnin, burnin), None)

        pred = bpmf.predict(islice(sampler, samps))
        return (bpmf, pred) if ret_pmf else pred
예제 #3
0

print("Loading data")
opener = gzip.open if args.data_file.endswith('.gz') else open
with opener(args.data_file, 'rb') as f:
    data = np.load(f)

if isinstance(data, np.ndarray):
    known = data > 0
    ratings = np.zeros((known.sum(), 3))
    for idx, (i, j) in enumerate(np.transpose(known.nonzero())):
        ratings[idx, :] = i, j, data[i, j]
else:
    ratings = data['_ratings']

bpmf = BayesianPMF(ratings, args.latent_d)

print("Doing initial MAP fit")
for train, valid in bpmf.fit_minibatches_until_validation(
        args.batch_size, args.validation_size, do_yield=True, stop_thresh=args.stop_thresh):
    print("\t{} {:.5} {:.5}".format(datetime.datetime.now().time(), train, valid))

print("Saving model")
with open(args.out + '_model.pkl', 'wb') as f:
    pickle.dump(bpmf, f)

print("Getting MCMC samples")
num_samps = 2000
u_samps = np.empty((num_samps, bpmf.num_users, bpmf.latent_d)); u_samps.fill(np.nan)
v_samps = np.empty((num_samps, bpmf.num_items, bpmf.latent_d)); v_samps.fill(np.nan)
for idx, (u, v) in enumerate(itertools.islice(bpmf.samples(), num_samps)):