def nmf_run(): """ 非负矩阵分解 run :return: """ # 1、导入用户商品矩阵 print("----------- 1、load data -----------") data_matrix = FTool.LoadData(file_name="data.txt").load_data_with_none() # 2、利用梯度下降法对矩阵进行分解 print("----------- 2、training -----------") w_matrix, h_matrix = train(data_matrix, 5, 10000, 1e-5) # 3、保存分解后的结果 print("----------- 3、save decompose -----------") with FTool.SaveModel(file_name="w_matrix") as save_file: save_file.save_model_mul(w_matrix) with FTool.SaveModel(file_name="h_matrix") as save_file: save_file.save_model_mul(h_matrix) # 4、预测 print("----------- 4、prediction -----------") predict = prediction(data_matrix, w_matrix, h_matrix, 0) # 进行Top-K推荐 print("----------- 5、top_k recommendation ------------") top_recommend = top_k(predict, 2) print(top_recommend) print(w_matrix * h_matrix)
def recommend_run(): """ 系统过滤算法运行(包括基于用户和基于项的协同推荐) :return: """ # 1、导入用户商品数据 print( "=======================User-based Recommend========================") print("------------ 1. load data ------------") data = FTool.LoadData(file_name="data.txt").load_data_with_none() # 2、计算用户之间的相似性 print("------------ 2. calculate similarity between users -------------") w = similarity(data) # 3、利用用户之间的相似性进行推荐 print("------------ 3. predict ------------") predict = user_based_recommend(data, w, 2) # 4、进行Top-K推荐 print("------------ 4. top_k recommendation ------------") top_recommend = top_k(predict, 2) print(top_recommend) print( "\n\n=======================User-based Recommend========================" ) data = data.T # 将用户商品矩阵转换为商品用户矩阵 w = similarity(data) predict = item_based_recommend(data, w, 2) # 4、进行Top-K推荐 print("------------ 4. top_k recommendation ------------") top_recommend = top_k(predict, 2) print(top_recommend)
def mf_run(): """ Matrix Factorization-based Recommend run :return: """ # 1、导入用户商品矩阵 print("----------- 1、load data -----------") data_matrix = FTool.LoadData(file_name="data.txt").load_data_with_none() # 2、利用梯度下降法对矩阵进行分解 print("----------- 2、training -----------") p, q = grad_ascent(data_matrix, 3, 0.0002, 0.02, 5000) # 由于样本过少, 迭代次数增加min loss也不会继续收敛 # 3、保存分解后的结果 print("----------- 3、save decompose -----------") with FTool.SaveModel(file_name="p_result") as save_file: save_file.save_model_mul(p) with FTool.SaveModel(file_name="q_result") as save_file: save_file.save_model_mul(q) # 4、预测 print("----------- 4、prediction -----------") predict = prediction(data_matrix, p, q, 0) # 进行Top-K推荐 print("----------- 5、top_k recommendation ------------") top_recommend = top_k(predict, 2) print(top_recommend) print(p * q)
def k_means_pp(): """ K-Means ++ 实现 :return: """ k = 4 # 聚类中心的个数 file_path = "data.txt" # 1、导入数据 print("---------- 1.load data ------------") data, _, _ = FTool.LoadData(file_name=file_path).load_data( feature_end=0, need_label_length=True, need_list=True) x_data = [_data[0] for _data in data] y_data = [_data[1] for _data in data] with FTool.PaintingWithList(name="K-Means") as paint: paint.painting_simple_list(x_data, y_data) # 2、KMeans++的聚类中心初始化方法 print("---------- 2.K-Means++ generate centers ------------") data = np.mat(data) centroids = get_centroids(data, k) # 3、聚类计算 print("---------- 3.k-Means ------------") sub_center = k_means_function(data, k, centroids) # 4、保存所属的类别文件 print("---------- 4.save subCenter ------------") with FTool.SaveModel(file_name="sub_pp") as save_model: save_model.save_model_mul(sub_center) # 5、保存聚类中心 print("---------- 5.save centroids ------------") with FTool.SaveModel(file_name="center_pp") as save_model: save_model.save_model_mul(centroids)
def k_means(k_class: int): """ The K-Means Function :param k_class: 聚类中心的个数 :return: """ k = k_class # 聚类中心的个数 file_path = "data.txt" # 1、导入数据 print("---------- 1.load data ------------") data, _, _ = FTool.LoadData(file_name=file_path).load_data(feature_end=0, need_label_length=True, need_list=True) x_data = [_data[0] for _data in data] y_data = [_data[1] for _data in data] with FTool.PaintingWithList(name="K-Means") as paint: paint.painting_simple_list(x_data, y_data) # 2、随机初始化k个聚类中心 print("---------- 2.random center ------------") data = np.mat(data) centroids = rand_cent(data, k) # 3、聚类计算 print("---------- 3.kmeans ------------") sub_center = k_means_function(data, k, centroids) # 4、保存所属的类别文件 print("---------- 4.save subCenter ------------") with FTool.SaveModel(file_name="sub") as save_model: save_model.save_model_mul(sub_center) # 5、保存聚类中心 print("---------- 5.save centroids ------------") with FTool.SaveModel(file_name="center") as save_model: save_model.save_model_mul(centroids)
def personal_rank_run(): """ Personal Rank run :return: """ # 1、导入用户商品矩阵 print("------------ 1.load data -------------") data_mat = FTool.LoadData(file_name="data.txt").load_data_with_none() # 2、将用户商品矩阵转换成邻接表的存储(二部图) print("------------ 2.generate dict --------------") data_dict = generate_dict(data_mat) # 3、利用PersonalRank计算 print("------------ 3.PersonalRank --------------") rank = personal_rank(data_dict, 0.85, "U_0", 500) # 4、根据rank结果进行商品推荐 print("------------ 4.recommend -------------") result = recommend(data_dict, rank, "U_0") print(result)
def TestOfLR(): """ 测试逻辑回归算法 :return: """ lr_train.TrainOfLR() print("------------------------1. Load Model-----------------------") with FTool.LoadModel("weights") as model: w = model.load_model_mul() n = np.shape(w)[1] print("------------------------2. Load Data-----------------------") test_data = FTool.LoadData(file_name="test_data", feature_type="float").load_data_with_limit(number=n, offset=1) print("------------------------3. Get Prediction-----------------------") h = predict(test_data, w) # TODO painting print("------------------------4. Save Prediction-----------------------") save_result("result", h)
def cart_train(): """ cart_train training :return: """ # 1、导入训练数据 print("----------- 1、load data -------------") data, label, _ = FTool.LoadData(file_name="sine.txt").load_data( need_label_length=True, need_list=True, feature_end=0) feature = [_data[0] for _data in data] with FTool.PaintingWithList(name="cart_train Train") as paint: paint.painting_simple_list(feature, label) # 2、构建CART树 print("----------- 2、build cart_train ------------") regression_tree = build_tree(data, 30, 0.3) # 3、评估CART树 print("----------- 3、cal err -------------") err = cal_error(data, regression_tree) print("\t--------- err : ", err) # 4、保存最终的CART模型 print("----------- 4、save result -----------") save_model(regression_tree, "regression_tree")