def svdpp_algorithm() -> SVDpp: user_input = input( 'Do you want to continue with the default parameters? Y/N') if user_input.lower() == 'y': return SVDpp() else: n_factors = int(input('Enter total number of factors: ')) n_epochs = int(input('Enter number of epochs: ')) lr_all = float( input('Enter the learning rate for all the paramaters: ')) return SVDpp(n_factors, n_epochs, lr_all)
def train(self, df, model_path=''): ''' 隐语义模型训练 :param df: 格式包含该三列 --》 userid,iteamid,rating :param model_path:模型持久化地址,默认为空,不执行持久化 :return: 训练好的模型 ''' # 数据类型转换为 surprise 需要的格式 data = Dataset.load_from_df(df, self.reader) trainset = data.build_full_trainset() algo_lfm = SVDpp() algo_lfm.fit(trainset) if model_path: surprise.dump.dump(model_path, algo=algo_lfm, verbose=1) return algo_lfm
def slot_select_algo_combobox(self): self.algo_change_flag=True self.algo_trained_flag=False algo_name=self.select_algo_comboBox.currentText() if algo_name=='SVD': self.algo=SVD() self.display_process_label.append('加载SVD模型...') elif algo_name=='SVD++': self.algo = SVDpp() self.display_process_label.append('加载SVD++模型...') elif algo_name == 'NMF': self.algo = NMF() self.display_process_label.append('加载NMF模型...') elif algo_name == 'Slope One': self.algo = SlopeOne() self.display_process_label.append('加载Slope One模型...') elif algo_name == 'k-NN': self.algo = KNNBasic() self.display_process_label.append('加载k-NN模型...') elif algo_name == 'Centered k-NN': self.algo = KNNWithMeans() self.display_process_label.append('加载Centered k-NN模型...') elif algo_name == 'k-NN Baseline': self.algo = KNNBaseline() self.display_process_label.append('加载k-NN Baseline模型...') elif algo_name == 'Co-Clustering': self.algo = CoClustering() self.display_process_label.append('加载Co-Clustering模型...') elif algo_name == 'Baseline': self.algo = BaselineOnly() self.display_process_label.append('加载Baseline模型...') elif algo_name == 'Random': self.algo = NormalPredictor() self.display_process_label.append('加载Random模型...')
def run_svd(data, params, svdpp=False): '''Returns trained SVD model based on matrix factorization''' if svdpp: alg = SVDpp(n_factors=utils.get_param(params, 'n_factors'), n_epochs=utils.get_param(params, 'n_epochs'), lr_all=utils.get_param(params, 'learning_rate'), reg_all=utils.get_param(params, 'reg'), verbose=True) else: alg = SVD(biased=utils.get_param(params, 'biased'), n_factors=utils.get_param(params, 'n_factors'), n_epochs=utils.get_param(params, 'n_epochs'), lr_all=utils.get_param(params, 'learning_rate'), reg_all=utils.get_param(params, 'reg'), verbose=True) alg.fit(data) return alg
def grid_search(surprise_model): if type(surprise_model()) == type(SVDpp()): param_grid = {'n_factors':[20] , 'n_epochs':[20], 'lr_all':[0.005, 0.007, 0.05, 0.07, 0.5, 0.7, 1.0], 'reg_all':[0.02, 0.05, 0.2, 0.5]} gs = GridSearchCV(surprise_model, param_grid, measures=['rmse', 'mae'], cv=3,n_jobs=-1,joblib_verbose=1,refit=True) elif type(surprise_model()) == type(SVD()): param_grid = {'n_epochs':[20], 'lr_all':[0.005, 0.007, 0.05, 0.07, 0.5, 0.7, 1.0], 'reg_all':[0.02, 0.05, 0.2, 0.5]} gs = GridSearchCV(surprise_model, param_grid, measures=['rmse', 'mae'], cv=3,n_jobs=-1,joblib_verbose=1,refit=True) elif type(surprise_model()) == type(NMF()): param_grid = {'n_epochs':[20], 'reg_pu':[0.02, 0.04, 0.06, 0.08, 0.2], 'reg_qi':[0.02, 0.04, 0.06, 0.08, 0.2]} gs = GridSearchCV(surprise_model, param_grid, measures=['rmse', 'mae'], cv=3,n_jobs=-1,joblib_verbose=1,refit=True) elif type(surprise_model()) == type(BaselineOnly()): param_grid = {'bsl_options': {'method': ['als', 'sgd'], 'reg': [1, 2], 'learning_rate': [0.005, 0.05, 0.5, 1.0]}} gs = GridSearchCV(surprise_model, param_grid, measures=['rmse', 'mae'], cv=3,n_jobs=-1,joblib_verbose=1,refit=True) return gs
RS_data = Dataset.load_from_df(RS_ratings, RS_reader) # Benchmark_Algorithm_Metric benchmark = [] for algorithm in [ BaselineOnly(), CoClustering(), KNNBaseline(), KNNBasic(), KNNWithMeans(), KNNWithZScore(), NMF(), NormalPredictor(), SlopeOne(), SVD(), SVDpp() ]: # Perform cross validation results = cross_validate(algorithm, RS_data, measures=['rmse', 'mae', 'mse', 'fcp'], cv=5, verbose=True) # Results To Serie List tmp = pd.DataFrame.from_dict(results).mean(axis=0) tmp = tmp.append( pd.Series([str(algorithm).split(' ')[0].split('.')[-1]], index=['Algorithm'])) benchmark.append(tmp) pass # Results to Dataframe and .csv
import time from surprise.prediction_algorithms.matrix_factorization import SVDpp from surprise import Dataset, evaluate start_time = time.time() data = Dataset.load_builtin('ml-1m') e = 15 reg = .03 init_mean = .1 algo = SVDpp(verbose=1) evaluate(algo, data) running_time = time.time() - start_time print("SVD:", running_time, " s")
print(f'HW5 Implementation Out-of-Sample Error: {e_out:.3}') # "off-the-shelf" SVD from numpy U, Sigma, V = off_the_shelf.scipy_svd_train(M, N, K, Y_train) U = np.matmul(U, np.diag(np.sqrt(Sigma))) V = np.matmul(np.diag(np.sqrt(Sigma)), V) e_in = svd_sgd.get_err(U, V.transpose(), Y_train) e_out = svd_sgd.get_err(U, V.transpose(), Y_test) print(f'SciPy SVD In-Sample Error: {e_in:.3}') print(f'SciPy SVD Out-of-Sample Error: {e_out:.3}') # Surprise models svd_models = [ ('SVD Unbiased', SVD(n_factors=20, biased=False, n_epochs=100)), ('SVD w/ Global and Term Bias', SVD(n_factors=20, n_epochs=100)), ('SVD++', SVDpp(n_factors=20)), ] def get_surprise_err(model, d): err = 0.0 for u, v, rating in d: # square error est = model.predict(u, v).est err += 0.5 * (int(rating) - est)**2 return err / len(d) reader = Reader(line_format='user item rating', sep='\t') data = Dataset.load_from_folds([('../data/train.txt', '../data/test.txt')], reader) train, test = list(data.folds())[0]