def fit(
    real,
    known,
    latent_d=1,
    ret_pmf=False,
    subtract_mean=False,
    sig_u=1e10,
    sig_v=1e10,
    sig=1,
    do_bayes=False,
    burnin=10,
    samps=200,
    stop_thresh=1e-10,
    min_learning_rate=1e-20,
):
    ratings = np.zeros((known.sum(), 3))
    for idx, (i, j) in enumerate(np.transpose(known.nonzero())):
        ratings[idx] = [i, j, real[i, j]]

    pmf = ProbabilisticMatrixFactorization(ratings, latent_d, subtract_mean)
    pmf.sigma_sq = sig
    pmf.sigma_u_sq = sig_u
    pmf.sigma_v_sq = sig_v
    pmf.stop_thresh = stop_thresh
    pmf.min_learning_rate = min_learning_rate
    pmf.fit()

    if not do_bayes:
        pred = pmf.predicted_matrix()
        return (pmf, pred) if ret_pmf else pred
    else:
        bpmf = BayesianPMF(ratings, 1)
        bpmf.__setstate__(pmf.__getstate__())
        sampler = bpmf.samples()

        # do burn-in
        next(islice(sampler, burnin, burnin), None)

        pred = bpmf.predict(islice(sampler, samps))
        return (bpmf, pred) if ret_pmf else pred
def fit(real,
        known,
        latent_d=1,
        ret_pmf=False,
        subtract_mean=False,
        sig_u=1e10,
        sig_v=1e10,
        sig=1,
        do_bayes=False,
        burnin=10,
        samps=200,
        stop_thresh=1e-10,
        min_learning_rate=1e-20):
    ratings = np.zeros((known.sum(), 3))
    for idx, (i, j) in enumerate(np.transpose(known.nonzero())):
        ratings[idx] = [i, j, real[i, j]]

    pmf = ProbabilisticMatrixFactorization(ratings, latent_d, subtract_mean)
    pmf.sigma_sq = sig
    pmf.sigma_u_sq = sig_u
    pmf.sigma_v_sq = sig_v
    pmf.stop_thresh = stop_thresh
    pmf.min_learning_rate = min_learning_rate
    pmf.fit()

    if not do_bayes:
        pred = pmf.predicted_matrix()
        return (pmf, pred) if ret_pmf else pred
    else:
        bpmf = BayesianPMF(ratings, 1)
        bpmf.__setstate__(pmf.__getstate__())
        sampler = bpmf.samples()

        # do burn-in
        next(islice(sampler, burnin, burnin), None)

        pred = bpmf.predict(islice(sampler, samps))
        return (bpmf, pred) if ret_pmf else pred
Ejemplo n.º 3
0
        ratings[idx, :] = i, j, data[i, j]
else:
    ratings = data['_ratings']

bpmf = BayesianPMF(ratings, args.latent_d)

print("Doing initial MAP fit")
for train, valid in bpmf.fit_minibatches_until_validation(
        args.batch_size, args.validation_size, do_yield=True, stop_thresh=args.stop_thresh):
    print("\t{} {:.5} {:.5}".format(datetime.datetime.now().time(), train, valid))

print("Saving model")
with open(args.out + '_model.pkl', 'wb') as f:
    pickle.dump(bpmf, f)

print("Getting MCMC samples")
num_samps = 2000
u_samps = np.empty((num_samps, bpmf.num_users, bpmf.latent_d)); u_samps.fill(np.nan)
v_samps = np.empty((num_samps, bpmf.num_items, bpmf.latent_d)); v_samps.fill(np.nan)
for idx, (u, v) in enumerate(itertools.islice(bpmf.samples(), num_samps)):
    if idx % 10 == 0:
        print(datetime.datetime.now().time(), idx)
    u_samps[idx,:,:] = u
    v_samps[idx,:,:] = v

print("Saving u samples")
np.save(args.out + '_u_samps.npy', u_samps)

print("Saving v samples")
np.save(args.out + '_v_samps.npy', v_samps)