def run(self, type, n_jobs=8, n_components=100, max_iter=50, vad_K=100): lam_alpha = lam_beta = 1e-1 lam_theta = lam_gamma = 1e-1 lam_gamma_p = 1e-1 lam_gamma_n = 1e-1 lam_theta_p = 1e-1 lam_theta_n = 1e-1 c0 = 1. c1 = 20. # print 'lam_alpha:', lam_alpha # print 'lam_beta:', lam_beta # print 'lam_theta:', lam_theta # print 'lam_gamma:', lam_gamma # print 'lam_gamma_p:', lam_gamma_p # print 'lam_gamma_n:', lam_gamma_n best_ndcg_10 = 0.0 best_U = None best_V = None if type == 'cofactor': print 'modified cofactor model, using weight for the embedding' best_mu = 1.0 for mu in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]: print 'mu = %.2f' % (mu) print self.save_dir self.clean_savedir() coder = cofactor.CoFacto(mu=mu, n_components=n_components, max_iter=max_iter, batch_size=1000, init_std=0.01, dtype=np.float32, n_jobs=n_jobs, random_state=98765, save_params=True, save_dir=self.save_dir, early_stopping=True, verbose=True, lambda_alpha=lam_alpha, lambda_theta=lam_theta, lambda_beta=lam_beta, lambda_gamma=lam_gamma, c0=c0, c1=c1) coder.fit(self.train_data, self.X_sppmi, vad_data=self.vad_data, batch_users=300, k=vad_K, clear_invalid=False, n_jobs=4) self.test_data.data = np.ones_like(self.test_data.data) n_params = len(glob.glob(os.path.join(self.save_dir, '*.npz'))) params = np.load( os.path.join( self.save_dir, 'CoFacto_K%d_iter%d.npz' % (n_components, n_params - 1))) U, V = params['U'], params['V'] (is_better, new_U, new_V, new_ndcg_10) = self.local_eval(U, V, best_ndcg_10) if is_better: best_ndcg_10 = new_ndcg_10 best_U = new_U best_V = new_V best_mu = mu print 'Best with mu = %.2f' % (best_mu) model_out_name = 'ModifiedCofactor_K100_best_ndcd10_%.4f.npz' % ( best_ndcg_10) np.savez(model_out_name, U=best_U, V=best_V) if type == 'model2': print 'positive project embedding + positive user embedding' best_mu_u = 0.0 for mu_u in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]: print 'mu_u = %.2f , mu_p = %.2f' % (mu_u, (1.0 - mu_u)) print self.save_dir self.clean_savedir() coder = mymodel2.ParallelMFPosUserPosProjectEmbedding( mu_u=mu_u, mu_p=1.0 - mu_u, n_components=n_components, max_iter=max_iter, batch_size=1000, init_std=0.01, dtype=np.float32, n_jobs=n_jobs, random_state=98765, save_params=True, save_dir=self.save_dir, early_stopping=True, verbose=True, lambda_alpha=lam_alpha, lambda_theta=lam_theta, lambda_beta=lam_beta, lambda_gamma=lam_gamma, c0=c0, c1=c1) coder.fit(self.train_data, self.X_sppmi, self.Y_sppmi, vad_data=self.vad_data, batch_users=300, k=vad_K, clear_invalid=False, n_jobs=4) self.test_data.data = np.ones_like(self.test_data.data) n_params = len(glob.glob(os.path.join(self.save_dir, '*.npz'))) params = np.load( os.path.join( self.save_dir, 'CoFacto_K%d_iter%d.npz' % (n_components, n_params - 1))) U, V = params['U'], params['V'] (is_better, new_U, new_V, new_ndcg_10) = self.local_eval(U, V, best_ndcg_10) if is_better: best_ndcg_10 = new_ndcg_10 best_U = new_U best_V = new_V best_mu_u = mu_u print 'Best with mu_u = %.2f and mu_p = %.2f' % (best_mu_u, 1.0 - best_mu_u) model_out_name = 'Model2_K100_best_ndcd10_%.4f.npz' % ( best_ndcg_10) np.savez(model_out_name, U=best_U, V=best_V) if type == 'model3': print 'positive project embedding + negative project embedding' best_mu_p_p = 0.0 for mu_p_p in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]: print 'mu_p_p = %.2f , mu_p_n = %.2f' % (mu_p_p, (1.0 - mu_p_p)) print self.save_dir self.clean_savedir() coder = mymodel3.ParallelMFPositiveNegativeProjectEmbedding( mu_p_p=mu_p_p, mu_p_n=1.0 - mu_p_p, n_components=n_components, max_iter=max_iter, batch_size=1000, init_std=0.01, dtype=np.float32, n_jobs=n_jobs, random_state=98765, save_params=True, save_dir=self.save_dir, early_stopping=True, verbose=True, lambda_alpha=lam_alpha, lambda_theta=lam_theta, lambda_beta=lam_beta, lambda_gamma_p=lam_gamma_p, lambda_gamma_n=lam_gamma_n, c0=c0, c1=c1) # lambda_gamma = 1e-1 coder.fit(self.train_data, self.X_sppmi, self.X_neg_sppmi, vad_data=self.vad_data, batch_users=300, k=vad_K, clear_invalid=False, n_jobs=4) self.test_data.data = np.ones_like(self.test_data.data) n_params = len(glob.glob(os.path.join(self.save_dir, '*.npz'))) params = np.load( os.path.join( self.save_dir, 'CoFacto_K%d_iter%d.npz' % (n_components, n_params - 1))) U, V = params['U'], params['V'] (is_better, new_U, new_V, new_ndcg_10) = self.local_eval(U, V, best_ndcg_10) if is_better: best_ndcg_10 = new_ndcg_10 best_U = new_U best_V = new_V best_mu_p_p = mu_p_p print 'Best with mu_p_p = %.2f and mu_p_n = %.2f' % ( best_mu_p_p, 1.0 - best_mu_p_p) model_out_name = 'Model3_K100_best_ndcd10_%.4f.npz' % ( best_ndcg_10) np.savez(model_out_name, U=best_U, V=best_V) if type == 'model4': print 'positive user embedding + negative user embedding' best_mu_u_p = 0.1 for mu_u_p in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]: print 'mu_u_p = %.2f , mu_u_n = %.2f' % (mu_u_p, (1.0 - mu_u_p)) print self.save_dir self.clean_savedir() coder = mymodel4.ParallelMFPositiveNegativeUserEmbedding( mu_u_p=mu_u_p, mu_u_n=1.0 - mu_u_p, n_components=n_components, max_iter=max_iter, batch_size=1000, init_std=0.01, dtype=np.float32, n_jobs=n_jobs, random_state=98765, save_params=True, save_dir=self.save_dir, early_stopping=True, verbose=True, lambda_alpha=lam_alpha, lambda_theta_p=lam_theta_p, lambda_theta_n=lam_theta_n, lambda_beta=lam_beta, c0=c0, c1=c1) coder.fit(self.train_data, self.Y_sppmi, self.Y_neg_sppmi, vad_data=self.vad_data, batch_users=300, k=vad_K, clear_invalid=False, n_jobs=4) self.test_data.data = np.ones_like(self.test_data.data) n_params = len(glob.glob(os.path.join(self.save_dir, '*.npz'))) params = np.load( os.path.join( self.save_dir, 'CoFacto_K%d_iter%d.npz' % (n_components, n_params - 1))) U, V = params['U'], params['V'] (is_better, new_U, new_V, new_ndcg_10) = self.local_eval(U, V, best_ndcg_10) if is_better: best_ndcg_10 = new_ndcg_10 best_U = new_U best_V = new_V best_mu_u_p = mu_u_p print 'Best with mu_u_p = %.2f and mu_u_n = %.2f' % ( best_mu_u_p, 1.0 - best_mu_u_p) model_out_name = 'Model4_K100_best_ndcd10_%.4f.npz' % ( best_ndcg_10) np.savez(model_out_name, U=best_U, V=best_V) if type == 'model5': print 'positive and negative project embedding + positive user embedding' best_mu_u_p = 0.0 best_mu_p_p = 0.0 best_mu_p_n = 0.0 count = 0 for mu_u_p in np.arange(0.1, 0.9, 0.1): for mu_p_p in np.arange(0.1, 1.0 - mu_u_p, 0.1): mu_p_n = 1.0 - mu_u_p - mu_p_p if mu_p_n <= 0.001: continue else: count += 1 print 'mu_u_p = %.1f, mu_p_p = %.1f, mu_p_n = %.1f' % ( mu_u_p, mu_p_p, mu_p_n) print self.save_dir self.clean_savedir() coder = mymodel5.ParallelMFPosUserPosNegProjectEmbedding( mu_u_p=mu_u_p, mu_p_p=mu_p_p, mu_p_n=mu_p_n, n_components=n_components, max_iter=max_iter, batch_size=1000, init_std=0.01, dtype=np.float32, n_jobs=n_jobs, random_state=98765, save_params=True, save_dir=self.save_dir, early_stopping=True, verbose=True, lambda_alpha=lam_alpha, lambda_theta_p=lam_theta_p, lambda_beta=lam_beta, lambda_gamma_p=lam_gamma_p, lambda_gamma_n=lam_gamma_n, c0=c0, c1=c1) coder.fit(self.train_data, self.X_sppmi, self.X_neg_sppmi, self.Y_sppmi, vad_data=self.vad_data, batch_users=300, k=vad_K, clear_invalid=False, n_jobs=4) self.test_data.data = np.ones_like(self.test_data.data) n_params = len( glob.glob(os.path.join(self.save_dir, '*.npz'))) params = np.load( os.path.join( self.save_dir, 'CoFacto_K%d_iter%d.npz' % (n_components, n_params - 1))) U, V = params['U'], params['V'] (is_better, new_U, new_V, new_ndcg_10) = self.local_eval(U, V, best_ndcg_10) if is_better: best_ndcg_10 = new_ndcg_10 best_U = new_U best_V = new_V best_mu_u_p = mu_u_p best_mu_p_p = mu_p_p best_mu_p_n = mu_p_n print count, ' cases' print 'Best with mu_u_p = %.1f, mu_p_p = %.1f, mu_p_n = %.1f' % ( best_mu_u_p, best_mu_p_p, best_mu_p_n) model_out_name = 'Model5_K100_best_ndcd10_%.4f.npz' % ( best_ndcg_10) np.savez(model_out_name, U=best_U, V=best_V) if type == 'model6': print 'positive project embedding + positive + negative user embedding' best_mu_u_p = 0.0 best_mu_u_n = 0.0 best_mu_p_p = 0.0 count = 0 for mu_u_p in np.arange(0.1, 0.9, 0.1): for mu_u_n in np.arange(0.1, 1.0 - mu_u_p, 0.1): mu_p_p = 1.0 - mu_u_p - mu_u_n if mu_p_p <= 0.001: continue else: count += 1 print 'mu_u_p = %.1f, mu_u_n = %.1f, mu_p_p = %.1f' % ( mu_u_p, mu_u_n, mu_p_p) print self.save_dir self.clean_savedir() coder = mymodel6.ParallelMFPosNegUserPosProjectEmbedding( mu_u_p=mu_u_p, mu_u_n=mu_u_n, mu_p_p=mu_p_p, n_components=n_components, max_iter=max_iter, batch_size=1000, init_std=0.01, dtype=np.float32, n_jobs=n_jobs, random_state=98765, save_params=True, save_dir=self.save_dir, early_stopping=True, verbose=True, lambda_alpha=lam_alpha, lambda_theta_p=lam_theta_p, lambda_theta_n=lam_theta_n, lambda_beta=lam_beta, lambda_gamma_p=lam_gamma_p, c0=c0, c1=c1) coder.fit(self.train_data, self.X_sppmi, self.Y_sppmi, self.Y_neg_sppmi, vad_data=self.vad_data, batch_users=300, k=vad_K, clear_invalid=False, n_jobs=4) self.test_data.data = np.ones_like(self.test_data.data) n_params = len( glob.glob(os.path.join(self.save_dir, '*.npz'))) params = np.load( os.path.join( self.save_dir, 'CoFacto_K%d_iter%d.npz' % (n_components, n_params - 1))) U, V = params['U'], params['V'] (is_better, new_U, new_V, new_ndcg_10) = self.local_eval(U, V, best_ndcg_10) if is_better: best_ndcg_10 = new_ndcg_10 best_U = new_U best_V = new_V best_mu_u_p = mu_u_p best_mu_u_n = mu_u_n best_mu_p_p = mu_p_p print count, ' cases' print 'Best with mu_u_p = %.1f, mu_u_n = %.1f, mu_p_p = %.1f' % ( best_mu_u_p, best_mu_u_n, best_mu_p_p) model_out_name = 'Model6_K100_best_ndcd10_%.4f.npz' % ( best_ndcg_10) np.savez(model_out_name, U=best_U, V=best_V) if type == 'model7': print 'positive + negative project embedding + positive + negative user embedding' best_mu_u_p = 0.0 best_mu_u_n = 0.0 best_mu_p_p = 0.0 best_mu_p_n = 0.0 count = 0 for mu_u_p in np.arange(0.1, 0.8, 0.1): for mu_u_n in np.arange(0.1, 0.9 - mu_u_p, 0.1): for mu_p_p in np.arange(0.1, 1.0 - mu_u_p - mu_u_n, 0.1): mu_p_n = 1.0 - mu_u_p - mu_u_n - mu_p_p if mu_p_n <= 0.001: continue else: count += 1 print 'mu_u_p = %.1f, mu_u_n = %.1f, mu_p_p = %.1f, mu_p_n = %.1f' % ( mu_u_p, mu_u_n, mu_p_p, mu_p_n) print self.save_dir self.clean_savedir() coder = mymodel7.ParallelMFPositiveNegativeUserProjectEmbedding( mu_u_p=mu_u_p, mu_u_n=mu_u_n, mu_p_p=mu_p_p, mu_p_n=mu_p_n, n_components=n_components, max_iter=max_iter, batch_size=1000, init_std=0.01, dtype=np.float32, n_jobs=n_jobs, random_state=98765, save_params=True, save_dir=self.save_dir, early_stopping=True, verbose=True, lambda_alpha=lam_alpha, lambda_theta_p=lam_theta_p, lambda_theta_n=lam_theta_n, lambda_beta=lam_beta, lambda_gamma_p=lam_gamma_p, lambda_gamma_n=lam_gamma_n, c0=c0, c1=c1) coder.fit(self.train_data, self.X_sppmi, self.X_neg_sppmi, self.Y_sppmi, self.Y_neg_sppmi, vad_data=self.vad_data, batch_users=300, k=vad_K, clear_invalid=False, n_jobs=4) self.test_data.data = np.ones_like( self.test_data.data) n_params = len( glob.glob(os.path.join(self.save_dir, '*.npz'))) params = np.load( os.path.join( self.save_dir, 'CoFacto_K%d_iter%d.npz' % (n_components, n_params - 1))) U, V = params['U'], params['V'] (is_better, new_U, new_V, new_ndcg_10) = self.local_eval( U, V, best_ndcg_10) if is_better: best_ndcg_10 = new_ndcg_10 best_U = new_U best_V = new_V best_mu_u_p = mu_u_p best_mu_u_n = mu_u_n best_mu_p_p = mu_p_p best_mu_p_n = mu_p_n print count, ' cases' print 'Best with mu_u_p = %.1f, mu_u_n = %.1f, mu_p_p = %.1f, mu_p_n = %.1f' % ( best_mu_u_p, best_mu_u_n, best_mu_p_p, best_mu_p_n) model_out_name = 'Model7_K100_best_ndcd10_%.4f.npz' % ( best_ndcg_10) np.savez(model_out_name, U=best_U, V=best_V)
def run(self, type, n_jobs = 16, n_components = 100, max_iter = 50, vad_K = 100, **kwargs): saved_model = kwargs.get('saved_model', False) if saved_model: MODELS_DIR = 'MODELS' if not os.path.exists(MODELS_DIR): os.mkdir(MODELS_DIR) lam = kwargs.get('lam', 1e-1) lam_alpha = lam_beta = lam lam_emb = kwargs.get('lam_emb', lam) lam_theta = lam_gamma = lam_gamma_p = lam_gamma_n = lam_theta_p = lam_emb c0 = 1. c1 = 10. self.n_components, self.lam, self.lam_emb = n_components, lam, lam_emb print '*************************************lam = %.3f ******************************************' % lam print '*************************************lam embedding = %.3f ******************************************' % lam_emb if type == 'wmf': U, V = wmf.decompose(self.train_data, self.vad_data, num_factors=n_components, lam=lam) (recall_all, ndcg_all, map_all) = self.eval(U, V) if saved_model: model_out_name = os.path.join(constants.SAVED_MODLE_DIR, 'WMF_K%d_lambda%.4f.npz' % (n_components, lam)) np.savez(model_out_name, U=U, V=V) elif type == 'cofactor': print 'cofactor model' print self.save_dir self.clean_savedir() CoFacto = cofactor.CoFacto( n_components=n_components, max_iter=max_iter, batch_size=1000, init_std=0.01, dtype=np.float32, n_jobs=n_jobs, random_state=98765, save_params=True, save_dir=self.save_dir, early_stopping=True, verbose=True, lambda_alpha=lam_alpha, lambda_theta=lam_theta, lambda_beta=lam_beta, lambda_gamma=lam_gamma, c0=c0, c1=c1) CoFacto.fit(self.train_data, self.X_sppmi, vad_data=self.vad_data, batch_users=3000, k=vad_K, clear_invalid=False, n_jobs = 16) self.test_data.data = np.ones_like(self.test_data.data) n_params = len(glob.glob(os.path.join(self.save_dir, '*.npz'))) params = np.load(os.path.join(self.save_dir, 'CoFacto_K%d_iter%d.npz' % (n_components, n_params - 1))) U, V = params['U'], params['V'] (recall_all, ndcg_all, map_all) = self.eval(U, V) recall100, ndcg100, map100 = recall_all[-1], ndcg_all[-1], map_all[-1] if saved_model: model_out_name = os.path.join(constants.SAVED_MODLE_DIR, 'Cofactor_K%d_lambda%.4f.npz' % (n_components, lam)) np.savez(model_out_name, U=U, V=V) elif type == 'rme': ret_params_only = bool(kwargs.get("ret_params_only", False)) print 'positive and negative project embedding + positive user embedding' mu_p_p = float(kwargs.get('mu_p_p', 1.0)) #weight to indicate importance of liked item embeddings mu_p_n = float(kwargs.get('mu_p_n', 1.0)) #weight to indicate importance of disliked item embeddings mu_u_p = float(kwargs.get('mu_u_p', 1.0)) #weight to indicate importance of user embeddings print 'mu_u_p = %.1f, mu_p_p = %.1f, mu_p_n = %.1f' % (mu_u_p, mu_p_p, mu_p_n) print self.save_dir self.clean_savedir() RME = ParallelRME(mu_u_p=mu_u_p, mu_p_p=mu_p_p, mu_p_n=mu_p_n, n_components=n_components, max_iter=max_iter, batch_size=3000, init_std=0.01, dtype=np.float32, n_jobs=n_jobs, random_state=98765, save_params=True, save_dir=self.save_dir, early_stopping=True, verbose=True, lambda_alpha=lam_alpha, lambda_theta_p=lam_theta_p, lambda_beta=lam_beta, lambda_gamma_p=lam_gamma_p, lambda_gamma_n=lam_gamma_n, c0=c0, c1=c1) RME.fit(self.train_data, self.X_sppmi, self.X_neg_sppmi, self.Y_sppmi, vad_data=self.vad_data, batch_users=3000, k=vad_K, clear_invalid=False, n_jobs = 15) n_params = len(glob.glob(os.path.join(self.save_dir, '*.npz'))) params = np.load(os.path.join(self.save_dir, 'RME_K%d_iter%d.npz' % (n_components, n_params - 1))) U, V = params['U'], params['V'] if (ret_params_only): return (U, V, self.cal_ndcg(U,V,K=vad_K)) self.test_data.data = np.ones_like(self.test_data.data) (recall_all, ndcg_all, map_all) = self.eval(U, V) recall100, ndcg100, map100 = recall_all[-1], ndcg_all[-1], map_all[-1] if saved_model: model_out_name = os.path.join(constants.SAVED_MODLE_DIR, 'RME_K%d_lambda%.4f.npz' % (n_components, lam)) np.savez(model_out_name, U=U, V=V) else: print 'Please select model from: rme, cofactor, wmf' sys.exit(1) U, V = None, None return (recall_all, ndcg_all, map_all)
def run_alone(self, type, n_jobs=8, n_components=100, max_iter=50, vad_K=100, **kwargs): lam_alpha = lam_beta = 1e-1 lam_theta = lam_gamma = 1e-1 lam_gamma_p = 1e-1 lam_gamma_n = 1e-1 lam_theta_p = 1e-1 lam_theta_n = 1e-1 c0 = 1. c1 = 20. fold = kwargs.get('fold', -1) if fold == -1: fold = '' else: fold = 'fold%d_' % fold if type == 'cofactor': print 'cofactor model' print self.save_dir self.clean_savedir() coder = cofactor.CoFacto(n_components=n_components, max_iter=max_iter, batch_size=1000, init_std=0.01, dtype=np.float32, n_jobs=n_jobs, random_state=98765, save_params=True, save_dir=self.save_dir, early_stopping=True, verbose=True, lambda_alpha=lam_alpha, lambda_theta=lam_theta, lambda_beta=lam_beta, lambda_gamma=lam_gamma, c0=c0, c1=c1) coder.fit(self.train_data, self.X_sppmi, vad_data=self.vad_data, batch_users=300, k=vad_K, clear_invalid=False, n_jobs=4) self.test_data.data = np.ones_like(self.test_data.data) n_params = len(glob.glob(os.path.join(self.save_dir, '*.npz'))) params = np.load( os.path.join( self.save_dir, 'CoFacto_K%d_iter%d.npz' % (n_components, n_params - 1))) U, V = params['U'], params['V'] (recall100, ndcg100, map100) = self.local_alone_eval(U, V) model_out_name = 'Cofactor_%srecall100_%.4f_ndcg100_%.4f_map100_%.4f.npz' % ( fold, recall100, ndcg100, map100) np.savez(model_out_name, U=U, V=V) if type == 'model2': print 'positive project embedding + positive user embedding' mu_p = float(kwargs.get('mu_p_p', 0.6)) mu_u = float(kwargs.get('mu_u_p', -1.0)) if mu_u == -1.0: mu_u = 1.0 - mu_p print 'mu_u = %.2f , mu_p = %.2f' % (mu_u, mu_p) print self.save_dir self.clean_savedir() # coder = mymodel2.MFPositiveUserProjectEmbedding(mu_u = mu_u, mu_p = mu_p, # n_components=n_components, max_iter=max_iter, batch_size=1000, init_std=0.01, dtype=np.float32, n_jobs=n_jobs, # random_state=98765, save_params=True, save_dir=self.save_dir, early_stopping=True, verbose=True, # lambda_alpha = lam_alpha, lambda_theta=lam_theta, lambda_beta=lam_beta, lambda_gamma=lam_gamma, c0=c0, c1=c1) coder = mymodel2.ParallelMFPosUserPosProjectEmbedding( mu_u=mu_u, mu_p=mu_p, n_components=n_components, max_iter=max_iter, batch_size=1000, init_std=0.01, dtype=np.float32, n_jobs=n_jobs, random_state=98765, save_params=True, save_dir=self.save_dir, early_stopping=True, verbose=True, lambda_alpha=lam_alpha, lambda_theta=lam_theta, lambda_beta=lam_beta, lambda_gamma=lam_gamma, c0=c0, c1=c1) coder.fit(self.train_data, self.X_sppmi, self.Y_sppmi, vad_data=self.vad_data, batch_users=300, k=vad_K, clear_invalid=False, n_jobs=4) self.test_data.data = np.ones_like(self.test_data.data) n_params = len(glob.glob(os.path.join(self.save_dir, '*.npz'))) params = np.load( os.path.join( self.save_dir, 'CoFacto_K%d_iter%d.npz' % (n_components, n_params - 1))) U, V = params['U'], params['V'] (recall100, ndcg100, map100) = self.local_alone_eval(U, V) model_out_name = 'Model2_K100_%srecall100_%.4f_ndcg100_%.4f_map100_%.4f.npz' % ( fold, recall100, ndcg100, map100) np.savez(model_out_name, U=U, V=V) if type == 'model3': print 'positive project embedding + negative project embedding' mu_p_p = float(kwargs.get('mu_p_p', 0.4)) mu_p_n = float(kwargs.get('mu_p_n', -1.0)) if mu_p_n == -1.0: mu_p_n = 1.0 - mu_p_p print 'mu_p_p = %.2f , mu_p_n = %.2f' % (mu_p_p, mu_p_n) print self.save_dir self.clean_savedir() coder = mymodel3.ParallelMFPositiveNegativeProjectEmbedding( mu_p_p=mu_p_p, mu_p_n=mu_p_n, n_components=n_components, max_iter=max_iter, batch_size=1000, init_std=0.01, dtype=np.float32, n_jobs=n_jobs, random_state=98765, save_params=True, save_dir=self.save_dir, early_stopping=True, verbose=True, lambda_alpha=lam_alpha, lambda_theta=lam_theta, lambda_beta=lam_beta, lambda_gamma_p=lam_gamma_p, lambda_gamma_n=lam_gamma_n, c0=c0, c1=c1) # lambda_gamma = 1e-1 coder.fit( self.train_data, self.X_sppmi, self.X_neg_sppmi, vad_data=self.vad_data, batch_users=300, k=vad_K, clear_invalid=False, n_jobs=4 ) #for active_cate selection, should use clear_invalid = False self.test_data.data = np.ones_like(self.test_data.data) n_params = len(glob.glob(os.path.join(self.save_dir, '*.npz'))) params = np.load( os.path.join( self.save_dir, 'CoFacto_K%d_iter%d.npz' % (n_components, n_params - 1))) U, V = params['U'], params['V'] (recall100, ndcg100, map100) = self.local_alone_eval(U, V) model_out_name = 'Model3_K100_%srecall100_%.4f_ndcg100_%.4f_map100_%.4f.npz' % ( fold, recall100, ndcg100, map100) np.savez(model_out_name, U=U, V=V) if type == 'model4': print 'positive user embedding + negative user embedding' mu_u_p = float(kwargs.get('mu_u_p', 0.4)) mu_u_n = float(kwargs.get('mu_u_n', -1.0)) if mu_u_n == -1.0: mu_u_n = 1.0 - mu_u_p print 'mu_u_p = %.2f , mu_u_n = %.2f' % (mu_u_p, mu_u_n) print self.save_dir self.clean_savedir() coder = mymodel4.ParallelMFPositiveNegativeUserEmbedding( mu_u_p=mu_u_p, mu_u_n=mu_u_n, n_components=n_components, max_iter=max_iter, batch_size=1000, init_std=0.01, dtype=np.float32, n_jobs=n_jobs, random_state=98765, save_params=True, save_dir=self.save_dir, early_stopping=True, verbose=True, lambda_alpha=lam_alpha, lambda_theta_p=lam_theta_p, lambda_theta_n=lam_theta_n, lambda_beta=lam_beta, c0=c0, c1=c1) coder.fit(self.train_data, self.Y_sppmi, self.Y_neg_sppmi, vad_data=self.vad_data, batch_users=300, k=vad_K, clear_invalid=False, n_jobs=4) self.test_data.data = np.ones_like(self.test_data.data) n_params = len(glob.glob(os.path.join(self.save_dir, '*.npz'))) params = np.load( os.path.join( self.save_dir, 'CoFacto_K%d_iter%d.npz' % (n_components, n_params - 1))) U, V = params['U'], params['V'] (recall100, ndcg100, map100) = self.local_alone_eval(U, V) model_out_name = 'Model4_K100_%srecall100_%.4f_ndcg100_%.4f_map100_%.4f.npz' % ( fold, recall100, ndcg100, map100) np.savez(model_out_name, U=U, V=V) if type == 'mcf': print 'positive and negative project embedding + positive user embedding' mu_p_p = float(kwargs.get('mu_p_p', 0.4)) mu_p_n = float(kwargs.get('mu_p_n', 0.4)) mu_u_p = float(kwargs.get('mu_u_p', -1.0)) if mu_u_p == -1.0: mu_u_p = 1.0 - mu_p_p - mu_p_n print 'mu_u_p = %.1f, mu_p_p = %.1f, mu_p_n = %.1f' % ( mu_u_p, mu_p_p, mu_p_n) print self.save_dir self.clean_savedir() coder = mymodel5.ParallelMFPosUserPosNegProjectEmbedding( mu_u_p=mu_u_p, mu_p_p=mu_p_p, mu_p_n=mu_p_n, n_components=n_components, max_iter=max_iter, batch_size=1000, init_std=0.01, dtype=np.float32, n_jobs=n_jobs, random_state=98765, save_params=True, save_dir=self.save_dir, early_stopping=True, verbose=True, lambda_alpha=lam_alpha, lambda_theta_p=lam_theta_p, lambda_beta=lam_beta, lambda_gamma_p=lam_gamma_p, lambda_gamma_n=lam_gamma_n, c0=c0, c1=c1) coder.fit(self.train_data, self.X_sppmi, self.X_neg_sppmi, self.Y_sppmi, vad_data=self.vad_data, batch_users=300, k=vad_K, clear_invalid=False, n_jobs=4) self.test_data.data = np.ones_like(self.test_data.data) n_params = len(glob.glob(os.path.join(self.save_dir, '*.npz'))) params = np.load( os.path.join( self.save_dir, 'CoFacto_K%d_iter%d.npz' % (n_components, n_params - 1))) U, V = params['U'], params['V'] (recall100, ndcg100, map100) = self.local_alone_eval(U, V) model_out_name = 'Model5_K100_%srecall100_%.4f_ndcg100_%.4f_map100_%.4f.npz' % ( fold, recall100, ndcg100, map100) np.savez(model_out_name, U=U, V=V) if type == 'model6': print 'positive project embedding + positive + negative user embedding' mu_u_p = float(kwargs.get('mu_u_p', 0.4)) mu_u_n = float(kwargs.get('mu_u_n', 0.4)) mu_p_p = float(kwargs.get('mu_p_p', -1.0)) if mu_p_p == -1.0: mu_p_p = 1.0 - mu_u_p - mu_u_n print 'mu_u_p = %.1f, mu_u_n = %.1f, mu_p_p = %.1f' % ( mu_u_p, mu_u_n, mu_p_p) print self.save_dir self.clean_savedir() coder = mymodel6.ParallelMFPosNegUserPosProjectEmbedding( mu_u_p=mu_u_p, mu_u_n=mu_u_n, mu_p_p=mu_p_p, n_components=n_components, max_iter=max_iter, batch_size=1000, init_std=0.01, dtype=np.float32, n_jobs=n_jobs, random_state=98765, save_params=True, save_dir=self.save_dir, early_stopping=True, verbose=True, lambda_alpha=lam_alpha, lambda_theta_p=lam_theta_p, lambda_theta_n=lam_theta_n, lambda_beta=lam_beta, lambda_gamma_p=lam_gamma_p, c0=c0, c1=c1) coder.fit(self.train_data, self.X_sppmi, self.Y_sppmi, self.Y_neg_sppmi, vad_data=self.vad_data, batch_users=300, k=vad_K, clear_invalid=False, n_jobs=4) self.test_data.data = np.ones_like(self.test_data.data) n_params = len(glob.glob(os.path.join(self.save_dir, '*.npz'))) params = np.load( os.path.join( self.save_dir, 'CoFacto_K%d_iter%d.npz' % (n_components, n_params - 1))) U, V = params['U'], params['V'] (recall100, ndcg100, map100) = self.local_alone_eval(U, V) model_out_name = 'Model6_K100_%srecall100_%.4f_ndcg100_%.4f_map100_%.4f.npz' % ( fold, recall100, ndcg100, map100) np.savez(model_out_name, U=U, V=V) if type == 'model7': print 'positive + negative project embedding + positive + negative user embedding' mu_u_p = float(kwargs.get('mu_u_p', 0.3)) mu_u_n = float(kwargs.get('mu_u_n', 0.3)) mu_p_p = float(kwargs.get('mu_p_p', 0.3)) mu_p_n = float(kwargs.get('mu_p_n', -1.0)) if mu_p_n == -1.0: mu_p_n = 1.0 - mu_u_p - mu_u_n - mu_p_p print 'mu_u_p = %.1f, mu_u_n = %.1f, mu_p_p = %.1f, mu_p_n = %.1f' % ( mu_u_p, mu_u_n, mu_p_p, mu_p_n) print self.save_dir self.clean_savedir() coder = mymodel7.ParallelMFPositiveNegativeUserProjectEmbedding( mu_u_p=mu_u_p, mu_u_n=mu_u_n, mu_p_p=mu_p_p, mu_p_n=mu_p_n, n_components=n_components, max_iter=max_iter, batch_size=1000, init_std=0.01, dtype=np.float32, n_jobs=n_jobs, random_state=98765, save_params=True, save_dir=self.save_dir, early_stopping=True, verbose=True, lambda_alpha=lam_alpha, lambda_theta_p=lam_theta_p, lambda_theta_n=lam_theta_n, lambda_beta=lam_beta, lambda_gamma_p=lam_gamma_p, lambda_gamma_n=lam_gamma_n, c0=c0, c1=c1) coder.fit(self.train_data, self.X_sppmi, self.X_neg_sppmi, self.Y_sppmi, self.Y_neg_sppmi, vad_data=self.vad_data, batch_users=300, k=vad_K, clear_invalid=False, n_jobs=4) self.test_data.data = np.ones_like(self.test_data.data) n_params = len(glob.glob(os.path.join(self.save_dir, '*.npz'))) params = np.load( os.path.join( self.save_dir, 'CoFacto_K%d_iter%d.npz' % (n_components, n_params - 1))) U, V = params['U'], params['V'] (recall100, ndcg100, map100) = self.local_alone_eval(U, V) model_out_name = 'Model7_K100_%srecall100_%.4f_ndcg100_%.4f_map100_%.4f.npz' % ( fold, recall100, ndcg100, map100) np.savez(model_out_name, U=U, V=V) if type == 'separatedmodel2': print 'positive project embedding + positive user embedding' mu_p = float(kwargs.get('mu_p_p', 0.6)) mu_u = float(kwargs.get('mu_u_p', -1.0)) if mu_u == -1.0: mu_u = 1.0 - mu_p print 'mu_u = %.2f , mu_p = %.2f' % (mu_u, mu_p) print self.save_dir self.clean_savedir() # coder = mymodel2.MFPositiveUserProjectEmbedding(mu_u = mu_u, mu_p = mu_p, # n_components=n_components, max_iter=max_iter, batch_size=1000, init_std=0.01, dtype=np.float32, n_jobs=n_jobs, # random_state=98765, save_params=True, save_dir=self.save_dir, early_stopping=True, verbose=True, # lambda_alpha = lam_alpha, lambda_theta=lam_theta, lambda_beta=lam_beta, lambda_gamma=lam_gamma, c0=c0, c1=c1) coder = separatedmymodel2.SeparatedParallelMFPosUserPosProjectEmbedding( mu_u=mu_u, mu_p=mu_p, n_components=n_components, max_iter=max_iter, batch_size=1000, init_std=0.01, dtype=np.float32, n_jobs=n_jobs, random_state=98765, save_params=True, save_dir=self.save_dir, early_stopping=True, verbose=True, lambda_alpha=lam_alpha, lambda_theta=lam_theta, lambda_beta=lam_beta, lambda_gamma=lam_gamma, c0=c0, c1=c1) coder.fit(self.train_data, self.X_sppmi, self.Y_sppmi, vad_data=self.vad_data, batch_users=300, k=vad_K, clear_invalid=True, n_jobs=16) self.test_data.data = np.ones_like(self.test_data.data) params = np.load( os.path.join(self.save_dir, 'CoFacto_K%d_separated.npz' % (n_components))) U, V = params['U'], params['V'] (recall100, ndcg100, map100) = self.local_alone_eval(U, V) model_out_name = 'Separated_Model2_K100_%srecall100_%.4f_ndcg100_%.4f_map100_%.4f.npz' % ( fold, recall100, ndcg100, map100) np.savez(model_out_name, U=U, V=V) if type == 'separatedmodel3': print 'separated positive project embedding + negative project embedding' mu_p_p = float(kwargs.get('mu_p_p', 0.4)) mu_p_n = float(kwargs.get('mu_p_n', -1.0)) if mu_p_n == -1.0: mu_p_n = 1.0 - mu_p_p print 'mu_p_p = %.2f , mu_p_n = %.2f' % (mu_p_p, mu_p_n) print self.save_dir self.clean_savedir() coder = separatedmymodel3.SeparatedParallelMFPositiveNegativeProjectEmbedding( mu_p_p=mu_p_p, mu_p_n=mu_p_n, n_components=n_components, max_iter=max_iter, batch_size=1000, init_std=0.01, dtype=np.float32, n_jobs=n_jobs, random_state=98765, save_params=True, save_dir=self.save_dir, early_stopping=True, verbose=True, lambda_alpha=lam_alpha, lambda_theta=lam_theta, lambda_beta=lam_beta, lambda_gamma_p=lam_gamma_p, lambda_gamma_n=lam_gamma_n, c0=c0, c1=c1) # lambda_gamma = 1e-1 coder.fit(self.train_data, self.X_sppmi, self.X_neg_sppmi, vad_data=self.vad_data, batch_users=300, k=vad_K, clear_invalid=True, n_jobs=16) self.test_data.data = np.ones_like(self.test_data.data) params = np.load( os.path.join(self.save_dir, 'CoFacto_K%d_separated.npz' % (n_components))) U, V = params['U'], params['V'] (recall100, ndcg100, map100) = self.local_alone_eval(U, V) model_out_name = 'Separated_Model3_K100_%srecall100_%.4f_ndcg100_%.4f_map100_%.4f.npz' % ( fold, recall100, ndcg100, map100) np.savez(model_out_name, U=U, V=V) if type == 'separatedmcf': print 'positive and negative project embedding + positive user embedding' mu_p_p = float(kwargs.get('mu_p_p', 0.4)) mu_p_n = float(kwargs.get('mu_p_n', 0.4)) mu_u_p = float(kwargs.get('mu_u_p', -1.0)) if mu_u_p == -1.0: mu_u_p = 1.0 - mu_p_p - mu_p_n print 'mu_u_p = %.1f, mu_p_p = %.1f, mu_p_n = %.1f' % ( mu_u_p, mu_p_p, mu_p_n) print self.save_dir self.clean_savedir() coder = separatedmymodel5.SeparatedParallelMFPosUserPosNegProjectEmbedding( mu_u_p=mu_u_p, mu_p_p=mu_p_p, mu_p_n=mu_p_n, n_components=n_components, max_iter=max_iter, batch_size=1000, init_std=0.01, dtype=np.float32, n_jobs=n_jobs, random_state=98765, save_params=True, save_dir=self.save_dir, early_stopping=True, verbose=True, lambda_alpha=lam_alpha, lambda_theta_p=lam_theta_p, lambda_beta=lam_beta, lambda_gamma_p=lam_gamma_p, lambda_gamma_n=lam_gamma_n, c0=c0, c1=c1) coder.fit(self.train_data, self.X_sppmi, self.X_neg_sppmi, self.Y_sppmi, vad_data=self.vad_data, batch_users=300, k=vad_K, clear_invalid=True, n_jobs=16) self.test_data.data = np.ones_like(self.test_data.data) params = np.load( os.path.join(self.save_dir, 'CoFacto_K%d_separated.npz' % (n_components))) U, V = params['U'], params['V'] (recall100, ndcg100, map100) = self.local_alone_eval(U, V) model_out_name = 'Separated_Model5_K100_%srecall100_%.4f_ndcg100_%.4f_map100_%.4f.npz' % ( fold, recall100, ndcg100, map100) np.savez(model_out_name, U=U, V=V)