def ProbabilisticSequentialMatrixFactorizer( Y, C, X, d, n, r, M, Mmiss, lam, V, Q, R, P, sig, Iter, YorgInt, Einit ): Epred = np.zeros([1, Iter + 1]) Efull = np.copy(Epred) Epred[:, 0] = Einit Efull[:, 0] = Einit RunTime = np.zeros([1, Iter + 1]) Yrec = np.zeros([d, n]) YrecL = np.copy(Yrec) YrecH = np.copy(Yrec) Id = np.eye(d) RunTimeStart = time.time() for i in range(0, Iter): for t in range(n): Mk = np.diag(M[:, t]) CM = Mk @ C Xp = X[:, [n - 1]] if t == 0 else X[:, [t - 1]] PP = P + Q Yrec[:, [t]] = C @ Xp # Assumes R is diagonal. Otherwise: MRM = Mk @ R @ Mk.T MRM = np.diag(np.diag(Mk) * np.diag(R)) Rbar = MRM + Xp.T @ V @ Xp * Id CPinv = compute_Sinv(CM, PP, Rbar) X[:, [t]] = Xp + PP @ CM.T @ CPinv @ (Y[:, [t]] - CM @ Xp) P = PP - PP @ CM.T @ CPinv @ CM @ PP eta_k = np.trace(CM @ PP @ CM.T + MRM) / d Nt = Xp.T @ V @ Xp + eta_k C = C + ((Y[:, [t]] - CM @ Xp) @ Xp.T @ V) / (Nt) V = V - (V @ Xp @ Xp.T @ V) / (Nt) YrecL[:, [t]] = Yrec[:, [t]] - sig * np.sqrt(Nt) YrecH[:, [t]] = Yrec[:, [t]] + sig * np.sqrt(Nt) Yrec2 = C @ X Epred[:, i + 1] = RMSEM(Yrec, YorgInt, Mmiss) Efull[:, i + 1] = RMSEM(Yrec2, YorgInt, Mmiss) RunTime[:, i + 1] = time.time() - RunTimeStart InsideBars = compute_number_inside_bars(Mmiss, d, n, YorgInt, YrecL, YrecH) return Epred, Efull, RunTime, InsideBars
def stochasticGradientStateSpaceMF(Y, C, X, d, n, r, M, Mmiss, lam, Q, R, P, sig, Iter, YorgInt, Einit): Epred = np.zeros([1, Iter + 1]) Efull = np.copy(Epred) Epred[:, 0] = Einit Efull[:, 0] = Einit RunTime = np.zeros([1, Iter + 1]) Yrec = np.zeros([d, n]) YrecL = np.copy(Yrec) YrecH = np.copy(Yrec) RunTimeStart = time.time() for i in range(0, Iter): gam1 = 1e-6 gam = gam1 / ((i + 1)**0.7) for t in range(n): MC = np.diag(M[:, t]) CM = MC @ C Xp = X[:, [n - 1]] if t == 0 else X[:, [t - 1]] PP = P + Q Yrec[:, [t]] = C @ Xp CPinv = compute_Sinv(CM, PP, R) X[:, [t]] = Xp + PP @ CM.T @ CPinv @ (Y[:, [t]] - CM @ Xp) P = PP - PP @ CM.T @ CPinv @ CM @ PP MRM = np.diag(np.diag(MC) * np.diag(R)) eta_k = np.trace(CM @ PP @ CM.T + MRM) / d C = C + gam * (1 / eta_k) * (MC.T @ (Y[:, [t]] - CM @ Xp) @ Xp.T) YrecL[:, [t]] = Yrec[:, [t]] - sig * np.sqrt(eta_k) YrecH[:, [t]] = Yrec[:, [t]] + sig * np.sqrt(eta_k) Yrec2 = C @ X Epred[:, i + 1] = RMSEM(Yrec, YorgInt, Mmiss) Efull[:, i + 1] = RMSEM(Yrec2, YorgInt, Mmiss) RunTime[:, i + 1] = time.time() - RunTimeStart InsideBars = compute_number_inside_bars(Mmiss, d, n, YorgInt, YrecL, YrecH) return Epred, Efull, RunTime, InsideBars
def temporalRegularizedMF(Y, C, X, d, n, r, M, Mmiss, lam, R, Iter, YorgInt, Einit): Epred = np.zeros([1, Iter + 1]) Efull = np.copy(Epred) Epred[:, 0] = Einit Efull[:, 0] = Einit RunTime = np.zeros([1, Iter + 1]) Yrec = np.zeros([d, n]) Ir = np.identity(r) RunTimeStart = time.time() for i in range(0, Iter): gam1 = 1e-6 nu = 2 gam = gam1 / ((i + 1)**0.7) for t in range(n): MC = np.diag(M[:, t]) CM = MC @ C Xp = X[:, [n - 1]] if t == 0 else X[:, [t - 1]] Yrec[:, [t]] = C @ Xp CPinv = np.linalg.inv(CM.T @ CM + nu * Ir) X[:, [t]] = CPinv @ (nu * Xp + CM.T @ Y[:, [t]]) C = C + gam * (MC.T @ (Y[:, [t]] - CM @ Xp) @ Xp.T) Yrec2 = C @ X Epred[:, i + 1] = RMSEM(Yrec, YorgInt, Mmiss) Efull[:, i + 1] = RMSEM(Yrec2, YorgInt, Mmiss) RunTime[:, i + 1] = time.time() - RunTimeStart return Epred, Efull, RunTime
def main(): args = parse_args() # Set the seed if given, otherwise draw one and print it out seed = args.seed or np.random.randint(10000) print("Using seed: %r" % seed) np.random.seed(seed) # Load the data Yorig = np.genfromtxt(args.input, delimiter=",") # Create a copy with missings set to zero YorigInt = np.copy(Yorig) YorigInt[np.isnan(YorigInt)] = 0 _range = trange if args.fancy else range log = print if not args.fancy else lambda *a, **kw: None # Initialize dimensions, hyperparameters, and noise covariances d, n = Yorig.shape r = 10 sig = 2 Iter = 2 rho = 10 v = 2 q = 0.1 p = 1.0 V = v * np.eye(r) Q = q * np.eye(r) R = rho * np.eye(d) P = p * np.eye(r) # Initialize arrays to keep track of quantaties of interest errors_predict = [] errors_full = [] runtimes = [] inside_bars = [] Y_hashes = [] C_hashes = [] X_hashes = [] for i in _range(args.repeats): # Create the missing mask (missMask) and its inverse (M) Ymiss = np.copy(Yorig) missRatio, missMask = prepare_missing(Ymiss, args.percentage / 100) M = np.array(np.invert(np.isnan(Ymiss)), dtype=int) # In the data we work with, set missing to 0 Y = np.copy(Ymiss) Y[np.isnan(Y)] = 0 C = np.random.rand(d, r) X = np.random.rand(r, n) # store hash of matrices; used to ensure they're the same between # scripts Y_hashes.append(matrix_hash(Y)) C_hashes.append(matrix_hash(C)) X_hashes.append(matrix_hash(X)) YrecInit = C @ X Einit = RMSEM(YrecInit, YorigInt, missMask) [ep, ef, rt, ib] = ProbabilisticSequentialMatrixFactorizer( Y, C, X, d, n, r, M, missMask, rho, V, Q, R, P, sig, Iter, YorigInt, Einit, ) errors_predict.append(ep[:, Iter].item()) errors_full.append(ef[:, Iter].item()) runtimes.append(rt[:, Iter].item()) inside_bars.append(ib) log( "[%s] Finished step %04i of %04i" % (dt.datetime.now().strftime("%c"), i + 1, args.repeats) ) params = { "r": r, "sig": sig, "rho": rho, "v": v, "q": q, "p": p, "Iter": Iter, } hashes = {"Y": Y_hashes, "C": C_hashes, "X": X_hashes} results = { "error_predict": errors_predict, "error_full": errors_full, "runtime": runtimes, "inside_sig": inside_bars, } output = prepare_output( args.input, __file__, params, hashes, results, seed, args.percentage, missRatio, "PSMF", ) dump_output(output, args.output)
def main(): args = parse_args() # Set the seed if given, otherwise draw one and print it out seed = args.seed or np.random.randint(10000) print("Using seed: %r" % seed) np.random.seed(seed) # Load the data Yorig = np.genfromtxt(args.input, delimiter=",") # Create a copy with missings set to zero YorigInt = np.copy(Yorig) YorigInt[np.isnan(YorigInt)] = 0 _range = trange if args.fancy else range log = print if not args.fancy else lambda *a, **kw: None # Extract dimensions and set latent dimensionality d, n = Yorig.shape r = 10 Iter = 2 # Initialize arrays to keep track of quantaties of interest errors_predict = [] errors_full = [] runtimes = [] Y_hashes = [] C_hashes = [] X_hashes = [] for i in _range(args.repeats): # Create the missing mask (missMask) and its inverse (M) Ymiss = np.copy(Yorig) missRatio, missMask = prepare_missing(Ymiss, args.percentage / 100) M = np.array(np.invert(np.isnan(Ymiss)), dtype=int) # In the data we work with, set missing to 0 Y = np.copy(Ymiss) Y[np.isnan(Y)] = 0 log("[%04i/%04i]" % (i + 1, args.repeats)) C = np.random.rand(d, r) X = np.random.rand(r, n) # store hash of matrices; used to ensure they're the same between # scripts Y_hashes.append(matrix_hash(Y)) C_hashes.append(matrix_hash(C)) X_hashes.append(matrix_hash(X)) YrecInit = C @ X Einit = RMSEM(YrecInit, YorigInt, missMask) # Since BPMF uses random numbers internally, we wouldn't get the same # C,X as the other methods without saving/resetting state. rand_state = np.random.get_state() try: [ep, ef, rt] = bpmf( Y, C, X, d, n, r, M, missMask, YorigInt, Einit, epochs=Iter ) errors_predict.append(ep[:, Iter].item()) errors_full.append(ef[:, Iter].item()) runtimes.append(rt[:, Iter].item()) except np.linalg.LinAlgError: errors_predict.append(float("nan")) errors_full.append(float("nan")) runtimes.append(float("nan")) np.random.set_state(rand_state) params = {"Iter": Iter, "r": r} hashes = {"Y": Y_hashes, "C": C_hashes, "X": X_hashes} results = { "error_predict": errors_predict, "error_full": errors_full, "runtime": runtimes, "inside_sig": None, # Not returned in the model } output = prepare_output( args.input, __file__, params, hashes, results, seed, args.percentage, missRatio, "BPMF", ) dump_output(output, args.output)
def bpmf( Y, C, X, num_p, num_m, num_feat, M, Mmiss, YorigInt, Einit, epochs=50, beta=2, ): assert (num_p, num_m) == Y.shape Epred = np.zeros([1, epochs + 1]) Efull = np.copy(Epred) Epred[:, 0] = Einit Efull[:, 0] = Einit RunTime = np.zeros([1, epochs + 1]) # Initialize hierarchical priors mu_u = np.zeros((num_feat, 1)) mu_m = np.zeros((num_feat, 1)) alpha_u = np.eye(num_feat) alpha_m = np.eye(num_feat) # parameters of Inv-Wishart distribution WI_u = np.eye(num_feat) b0_u = 2 df_u = num_feat mu0_u = np.zeros((num_feat, 1)) WI_m = np.eye(num_feat) b0_m = 2 df_m = num_feat mu0_m = np.zeros((num_feat, 1)) triplets_tr = [] # {dimension_idx, time_idx, measurement} triplets_pr = [] for i in range(num_p): for j in range(num_m): if M[i, j] == 1: # M = 1 means present triplets_tr.append((i, j, Y[i, j])) if 0.95 < Mmiss[i, j] < 1.05: triplets_pr.append((i, j, YorigInt[i, j])) train_vec = np.array(triplets_tr) probe_vec = np.array(triplets_pr) # NOTE we standardize and center the measurements, otherwise the algorithm # can diverge. We undo this during prediction, of course. old_mean = np.mean(train_vec[:, 2]) old_std = np.mean(train_vec[:, 2]) train_vec[:, 2] = (train_vec[:, 2] - old_mean) / old_std probe_vec[:, 2] = (probe_vec[:, 2] - old_mean) / old_std RunTimeStart = time.time() mean_rating = train_vec[:, 2].mean() count = np.zeros((num_p, num_m)) aa_p = train_vec[:, 0].astype(int) aa_m = train_vec[:, 1].astype(int) for ii in range(train_vec.shape[0]): count[aa_p[ii], aa_m[ii]] = train_vec[ii, 2] # Use the PMF solution as initial. We use the same number of epochs as # given to this function. w1_M1_sample, w1_P1_sample = pmf( train_vec, C, X, num_p, num_m, num_feat, epochs=epochs ) # Do simple fit mu_u = np.mean(w1_P1_sample).T alpha_u = np.linalg.inv(np.cov(w1_P1_sample)) mu_m = np.mean(w1_M1_sample).T alpha_m = np.linalg.inv(np.cov(w1_P1_sample)) count = count.T probe_rat_all = pred(w1_M1_sample, w1_P1_sample, probe_vec, mean_rating) counter_prob = 1 for epoch in range(epochs): # Sample from movie hyperparams N = w1_M1_sample.shape[0] x_bar = np.mean(w1_M1_sample, axis=0).reshape(num_feat, 1) S_bar = np.cov(w1_M1_sample, rowvar=False) WI_post = np.linalg.inv( np.linalg.inv(WI_m) + N / 1 * S_bar + N * b0_m * (mu0_m - x_bar) @ (mu0_m - x_bar).T / (1 * (b0_m + N)) ) WI_post = (WI_post + WI_post.T) / 2 df_mpost = df_m + N alpha_m = wishart.rvs(df_mpost, WI_post) mu_temp = (b0_m * mu0_m + N * x_bar) / (b0_m + N) lam = np.linalg.cholesky(np.linalg.inv((b0_m + N) * alpha_m)) lam = lam.T mu_m = lam @ np.random.randn(num_feat, 1) + mu_temp # Sample from user hyperparams N = w1_P1_sample.shape[0] x_bar = np.mean(w1_P1_sample, axis=0).reshape(num_feat, 1) S_bar = np.cov(w1_P1_sample, rowvar=False) WI_post = np.linalg.inv( np.linalg.inv(WI_u) + N / 1 * S_bar + N * b0_u * (mu0_u - x_bar) @ (mu0_u - x_bar).T / (1 * (b0_u + N)) ) WI_post = (WI_post + WI_post.T) / 2 df_upost = df_u + N alpha_u = wishart.rvs(df_upost, WI_post) mu_temp = (b0_u * mu0_u + N * x_bar) / (b0_u + N) lam = np.linalg.cholesky(np.linalg.inv((b0_u + N) * alpha_u)) lam = lam.T mu_u = lam @ np.random.randn(num_feat, 1) + mu_temp for gibbs in range(2): count = count.T # Infer posterior distribution over all movie feature vectors for mm in range(num_m): ff = M[:, mm] > 0 # select those that are present MM = w1_P1_sample[ff, :] rr = count[ff, mm] - mean_rating rr = np.expand_dims(rr, 1) # make it column vec. covar = np.linalg.inv(alpha_m + beta * MM.T @ MM) mean_m = covar @ (beta * MM.T @ rr + alpha_m @ mu_m) lam = np.linalg.cholesky(covar).T w1_M1_sample[mm, :] = ( lam @ np.random.randn(num_feat, 1) + mean_m ).squeeze() # Infer posterior distribution over all user feature vectors count = count.T for uu in range(num_p): ff = M[uu, :] > 0 MM = w1_M1_sample[ff, :] rr = count[ff, uu] - mean_rating rr = np.expand_dims(rr, 1) # make it column vec covar = np.linalg.inv(alpha_u + beta * MM.T @ MM) mean_u = covar @ (beta * MM.T @ rr + alpha_u @ mu_u) lam = np.linalg.cholesky(covar).T w1_P1_sample[uu, :] = ( lam @ np.random.randn(num_feat, 1) + mean_u ).squeeze() probe_rat = pred(w1_M1_sample, w1_P1_sample, probe_vec, mean_rating) probe_rat_all = (counter_prob * probe_rat_all + probe_rat) / ( counter_prob + 1 ) counter_prob += 1 # Reconstruct Yrec2. Note that the original BPMF code uses # probe_rat_all as the predictor, so we do the same pred_out = probe_rat_all * old_std + old_mean Yrec2 = np.zeros_like(Y) aa_p = probe_vec[:, 0].astype(int) aa_m = probe_vec[:, 1].astype(int) NN = probe_vec.shape[0] for ii in range(NN): Yrec2[aa_p[ii], aa_m[ii]] = pred_out[ii] Epred[:, epoch + 1] = np.nan # BPMF doesn't do online predictions Efull[:, epoch + 1] = RMSEM(Yrec2, YorigInt, Mmiss) RunTime[:, epoch + 1] = time.time() - RunTimeStart return Epred, Efull, RunTime
def pmf( Y, C, X, num_p, num_m, num_feat, M, # (1 - M) is all missing Mmiss, # Mmiss is _additional_ missing (which we can evaluate) YorigInt, Einit, epochs=50, epsilon=50, lmd=0.01, momentum=0.8, num_batches=9, ): assert (num_p, num_m) == Y.shape Epred = np.zeros([1, epochs + 1]) Efull = np.copy(Epred) Epred[:, 0] = Einit Efull[:, 0] = Einit RunTime = np.zeros([1, epochs + 1]) # NOTE: This is not the exact same initialization as the other algorithms, # but otherwise PMF doesn't converge w1_M1 = 0.1 * X.T w1_P1 = 0.1 * C w1_M1_inc = np.zeros_like(w1_M1) w1_P1_inc = np.zeros_like(w1_P1) triplets_tr = [] # {dimension_idx, time_idx, measurement} triplets_pr = [] for i in range(num_p): for j in range(num_m): if M[i, j] == 1: # M = 1 means present triplets_tr.append((i, j, Y[i, j])) if 0.95 < Mmiss[i, j] < 1.05: triplets_pr.append((i, j, np.nan)) train_vec = np.array(triplets_tr) probe_vec = np.array(triplets_pr) pairs_pr = probe_vec.shape[0] # NOTE we standardize and center the measurements, otherwise the algorithm # can diverge. We undo this during prediction, of course. old_mean = np.mean(train_vec[:, 2]) old_std = np.mean(train_vec[:, 2]) train_vec[:, 2] = (train_vec[:, 2] - old_mean) / old_std RunTimeStart = time.time() mean_rating = train_vec[:, 2].mean() for epoch in range(epochs): train_vec = np.random.permutation(train_vec) for batch in range(num_batches): # print(f"epoch {epoch} batch {batch}", end="\r") batch_arr = np.array_split(train_vec, num_batches)[batch] N = batch_arr.shape[0] aa_p = batch_arr[:, 0].astype(int) aa_m = batch_arr[:, 1].astype(int) rating = batch_arr[:, 2] # Default prediction is the mean rating rating = rating - mean_rating # Compute predictions pred_out = np.sum(w1_M1[aa_m, :] * w1_P1[aa_p, :], axis=1) # Compute gradients IO = np.tile(2 * (pred_out - rating).reshape(N, 1), (1, num_feat)) Ix_m = IO * w1_P1[aa_p, :] + lmd * w1_M1[aa_m, :] Ix_p = IO * w1_M1[aa_m, :] + lmd * w1_P1[aa_p, :] dw1_M1 = np.zeros((num_m, num_feat)) dw1_P1 = np.zeros((num_p, num_feat)) for ii in range(N): dw1_M1[aa_m[ii], :] = dw1_M1[aa_m[ii], :] + Ix_m[ii, :] dw1_P1[aa_p[ii], :] = dw1_P1[aa_p[ii], :] + Ix_p[ii, :] # Update movie and user features w1_M1_inc = momentum * w1_M1_inc + epsilon * dw1_M1 / N w1_M1 = w1_M1 - w1_M1_inc w1_P1_inc = momentum * w1_P1_inc + epsilon * dw1_P1 / N w1_P1 = w1_P1 - w1_P1_inc # Compute predictions on test set NN = pairs_pr aa_p = probe_vec[:, 0].astype(int) aa_m = probe_vec[:, 1].astype(int) pred_out = (np.sum(w1_M1[aa_m, :] * w1_P1[aa_p, :], axis=1) + mean_rating) pred_out = pred_out * old_std + old_mean # reconstruct Yrec2 Yrec2 = np.zeros_like(Y) for ii in range(NN): Yrec2[aa_p[ii], aa_m[ii]] = pred_out[ii] Epred[:, epoch + 1] = np.nan # PMF doesn't do online predictions Efull[:, epoch + 1] = RMSEM(Yrec2, YorigInt, Mmiss) RunTime[:, epoch + 1] = time.time() - RunTimeStart return Epred, Efull, RunTime
def robust_PSMF( Y, C, X, d, n, r, M, Mmiss, V, Q0, R0, P, lambda0, sig, Iter, YorigInt, Einit, ): Epred = np.zeros([1, Iter + 1]) Efull = np.zeros([1, Iter + 1]) Epred[:, 0] = Einit Efull[:, 0] = Einit RunTime = np.zeros([1, Iter + 1]) Yrec = np.zeros([d, n]) YrecL = np.zeros([d, n]) YrecH = np.zeros([d, n]) Id = np.identity(d) RunTimeStart = time.time() for i in range(0, Iter): Q = Q0 R = R0 lmd = lambda0 for t in range(n): Mk = np.diag(M[:, t]) CM = Mk @ C Xp = X[:, [n - 1]] if t == 0 else X[:, [t - 1]] PP = P + Q Yrec[:, [t]] = C @ Xp # Assumes R is diagonal. Otherwise: MRM = Mk @ R @ Mk.T MRM = np.diag(np.diag(Mk) * np.diag(R)) XVX = Xp.T @ V @ Xp PPCM = PP @ CM.T CMPPCM = CM @ PPCM # Replace Mk in Rbar = MRM + XVX * Mk by Id, otherwise singular Rbar = MRM + XVX * Id CPinv = compute_Sinv(CM, PP, Rbar) diff = Y[:, [t]] - CM @ Xp PPCMCPinv = PPCM @ CPinv X[:, [t]] = Xp + PPCMCPinv @ diff omega = (lmd + diff.T @ CPinv @ diff) / (lmd + d) P = omega * (PP - PPCMCPinv @ CM @ PP) eta_k = np.trace(MRM + CMPPCM) / d Nk = XVX + eta_k C = C + diff @ Xp.T @ V.T / Nk U = XVX * Mk + eta_k * Id Ui = np.diag(1.0 / np.diag(U)) phi = (lmd + diff.T @ Ui @ diff) / (lmd + d) V = phi * (V - (V @ Xp @ Xp.T @ V) / Nk) # We approximate the boundaries of the interval using (-sig*std, # +sig*std), based on the normal distribution. This can be shown to # have a negligle effect on performance compared to the exact # version (kept below for reference), but is significantly faster. sqU = np.sqrt(np.diag(U)).reshape(d, 1) YrecL[:, [t]] = Yrec[:, [t]] - sig * sqU YrecH[:, [t]] = Yrec[:, [t]] + sig * sqU # AREA can be precomputed # AREA = scipy.stats.norm().cdf(sig) - scipy.stats.norm().cdf(-sig) # YrecL[:, [t]], YrecH[:, [t]] = scipy.stats.t.interval( # AREA, # lmd, # loc=Yrec[:, [t]], # scale=np.sqrt(np.diag(U)).reshape(d, 1), # ) Q = omega * Q R = omega * R lmd = lmd + d Yrec2 = C @ X Epred[:, i + 1] = RMSEM(Yrec, YorigInt, Mmiss) Efull[:, i + 1] = RMSEM(Yrec2, YorigInt, Mmiss) RunTime[:, i + 1] = time.time() - RunTimeStart InsideBars = compute_number_inside_bars( Mmiss, d, n, YorigInt, YrecL, YrecH ) return Epred, Efull, RunTime, InsideBars