def decompose(train_data, vad_data, weight = 20, num_factors = 100, num_iters = 50, lam = 1e-1, batch_size = 1000): #model parameters num_factors = num_factors num_iters = num_iters batch_size = batch_size alpha = weight n_jobs = 1 lam_theta = lam_beta = lam print '********************** Factorizing using Matrix factorization **********************************' S = content_wmf.linear_surplus_confidence_matrix(train_data, alpha=alpha) U, V, vad_ndcg = content_wmf.factorize(S, num_factors, vad_data=vad_data, num_iters=num_iters, init_std=0.01, lambda_U_reg=lam_theta, lambda_V_reg=lam_beta, dtype='float32', random_state=98765, verbose=True, recompute_factors=batched_inv_joblib.recompute_factors_batched, batch_size=batch_size, n_jobs=n_jobs) return U, V
U_best = None V_best = None best_alpha = 0 #for alpha in [2, 5, 10, 20, 30, 50]: for alpha in [20]: S = content_wmf.linear_surplus_confidence_matrix(train_data, alpha=alpha) U, V, vad_ndcg = content_wmf.factorize( S, num_factors, vad_data=vad_data, num_iters=num_iters, init_std=0.01, lambda_U_reg=lam_theta, lambda_V_reg=lam_beta, dtype='float32', random_state=98765, verbose=True, recompute_factors=batched_inv_joblib.recompute_factors_batched, batch_size=batch_size, n_jobs=n_jobs) if vad_ndcg > best_ndcg: best_ndcg = vad_ndcg U_best = U.copy() V_best = V.copy() best_alpha = alpha print best_alpha, best_ndcg np.savez('Baseline1_MF_K100.npz', U=U_best, V=V_best)