コード例 #1
0
def nmf_run():
    """
    非负矩阵分解 run
    :return:
    """
    # 1、导入用户商品矩阵
    print("----------- 1、load data -----------")
    data_matrix = FTool.LoadData(file_name="data.txt").load_data_with_none()
    # 2、利用梯度下降法对矩阵进行分解
    print("----------- 2、training -----------")
    w_matrix, h_matrix = train(data_matrix, 5, 10000, 1e-5)
    # 3、保存分解后的结果
    print("----------- 3、save decompose -----------")
    with FTool.SaveModel(file_name="w_matrix") as save_file:
        save_file.save_model_mul(w_matrix)
    with FTool.SaveModel(file_name="h_matrix") as save_file:
        save_file.save_model_mul(h_matrix)
    # 4、预测
    print("----------- 4、prediction -----------")
    predict = prediction(data_matrix, w_matrix, h_matrix, 0)
    # 进行Top-K推荐
    print("----------- 5、top_k recommendation ------------")
    top_recommend = top_k(predict, 2)
    print(top_recommend)
    print(w_matrix * h_matrix)
コード例 #2
0
def recommend_run():
    """
    系统过滤算法运行(包括基于用户和基于项的协同推荐)
    :return:
    """
    # 1、导入用户商品数据
    print(
        "=======================User-based Recommend========================")
    print("------------ 1. load data ------------")
    data = FTool.LoadData(file_name="data.txt").load_data_with_none()
    # 2、计算用户之间的相似性
    print("------------ 2. calculate similarity between users -------------")
    w = similarity(data)
    # 3、利用用户之间的相似性进行推荐
    print("------------ 3. predict ------------")
    predict = user_based_recommend(data, w, 2)
    # 4、进行Top-K推荐
    print("------------ 4. top_k recommendation ------------")
    top_recommend = top_k(predict, 2)
    print(top_recommend)

    print(
        "\n\n=======================User-based Recommend========================"
    )
    data = data.T  # 将用户商品矩阵转换为商品用户矩阵
    w = similarity(data)
    predict = item_based_recommend(data, w, 2)
    # 4、进行Top-K推荐
    print("------------ 4. top_k recommendation ------------")
    top_recommend = top_k(predict, 2)
    print(top_recommend)
コード例 #3
0
ファイル: mf.py プロジェクト: shanwenhao1/Machine-Learning
def mf_run():
    """
    Matrix Factorization-based Recommend run
    :return:
    """
    # 1、导入用户商品矩阵
    print("----------- 1、load data -----------")
    data_matrix = FTool.LoadData(file_name="data.txt").load_data_with_none()
    # 2、利用梯度下降法对矩阵进行分解
    print("----------- 2、training -----------")
    p, q = grad_ascent(data_matrix, 3, 0.0002, 0.02,
                       5000)  # 由于样本过少, 迭代次数增加min loss也不会继续收敛
    # 3、保存分解后的结果
    print("----------- 3、save decompose -----------")
    with FTool.SaveModel(file_name="p_result") as save_file:
        save_file.save_model_mul(p)
    with FTool.SaveModel(file_name="q_result") as save_file:
        save_file.save_model_mul(q)
    # 4、预测
    print("----------- 4、prediction -----------")
    predict = prediction(data_matrix, p, q, 0)
    # 进行Top-K推荐
    print("----------- 5、top_k recommendation ------------")
    top_recommend = top_k(predict, 2)
    print(top_recommend)
    print(p * q)
コード例 #4
0
def k_means_pp():
    """
    K-Means ++ 实现
    :return:
    """
    k = 4  # 聚类中心的个数
    file_path = "data.txt"
    # 1、导入数据
    print("---------- 1.load data ------------")
    data, _, _ = FTool.LoadData(file_name=file_path).load_data(
        feature_end=0, need_label_length=True, need_list=True)
    x_data = [_data[0] for _data in data]
    y_data = [_data[1] for _data in data]
    with FTool.PaintingWithList(name="K-Means") as paint:
        paint.painting_simple_list(x_data, y_data)
    # 2、KMeans++的聚类中心初始化方法
    print("---------- 2.K-Means++ generate centers ------------")
    data = np.mat(data)
    centroids = get_centroids(data, k)
    # 3、聚类计算
    print("---------- 3.k-Means ------------")
    sub_center = k_means_function(data, k, centroids)
    # 4、保存所属的类别文件
    print("---------- 4.save subCenter ------------")
    with FTool.SaveModel(file_name="sub_pp") as save_model:
        save_model.save_model_mul(sub_center)
    # 5、保存聚类中心
    print("---------- 5.save centroids ------------")
    with FTool.SaveModel(file_name="center_pp") as save_model:
        save_model.save_model_mul(centroids)
コード例 #5
0
def k_means(k_class: int):
    """
    The K-Means Function
    :param k_class: 聚类中心的个数
    :return:
    """
    k = k_class  # 聚类中心的个数
    file_path = "data.txt"
    # 1、导入数据
    print("---------- 1.load data ------------")
    data, _, _ = FTool.LoadData(file_name=file_path).load_data(feature_end=0, need_label_length=True, need_list=True)
    x_data = [_data[0] for _data in data]
    y_data = [_data[1] for _data in data]
    with FTool.PaintingWithList(name="K-Means") as paint:
        paint.painting_simple_list(x_data, y_data)
    # 2、随机初始化k个聚类中心
    print("---------- 2.random center ------------")
    data = np.mat(data)
    centroids = rand_cent(data, k)
    # 3、聚类计算
    print("---------- 3.kmeans ------------")
    sub_center = k_means_function(data, k, centroids)
    # 4、保存所属的类别文件
    print("---------- 4.save subCenter ------------")
    with FTool.SaveModel(file_name="sub") as save_model:
        save_model.save_model_mul(sub_center)
    # 5、保存聚类中心
    print("---------- 5.save centroids ------------")
    with FTool.SaveModel(file_name="center") as save_model:
        save_model.save_model_mul(centroids)
コード例 #6
0
def personal_rank_run():
    """
    Personal Rank run
    :return:
    """
    # 1、导入用户商品矩阵
    print("------------ 1.load data -------------")
    data_mat = FTool.LoadData(file_name="data.txt").load_data_with_none()
    # 2、将用户商品矩阵转换成邻接表的存储(二部图)
    print("------------ 2.generate dict --------------")
    data_dict = generate_dict(data_mat)
    # 3、利用PersonalRank计算
    print("------------ 3.PersonalRank --------------")
    rank = personal_rank(data_dict, 0.85, "U_0", 500)
    # 4、根据rank结果进行商品推荐
    print("------------ 4.recommend -------------")
    result = recommend(data_dict, rank, "U_0")
    print(result)
コード例 #7
0
def TestOfLR():
    """
    测试逻辑回归算法
    :return:
    """
    lr_train.TrainOfLR()
    print("------------------------1. Load Model-----------------------")
    with FTool.LoadModel("weights") as model:
        w = model.load_model_mul()
    n = np.shape(w)[1]

    print("------------------------2. Load Data-----------------------")
    test_data = FTool.LoadData(file_name="test_data", feature_type="float").load_data_with_limit(number=n, offset=1)

    print("------------------------3. Get Prediction-----------------------")
    h = predict(test_data, w)

    # TODO painting
    print("------------------------4. Save Prediction-----------------------")
    save_result("result", h)
コード例 #8
0
def cart_train():
    """
    cart_train training
    :return:
    """
    # 1、导入训练数据
    print("----------- 1、load data -------------")
    data, label, _ = FTool.LoadData(file_name="sine.txt").load_data(
        need_label_length=True, need_list=True, feature_end=0)
    feature = [_data[0] for _data in data]
    with FTool.PaintingWithList(name="cart_train Train") as paint:
        paint.painting_simple_list(feature, label)
    # 2、构建CART树
    print("----------- 2、build cart_train ------------")
    regression_tree = build_tree(data, 30, 0.3)
    # 3、评估CART树
    print("----------- 3、cal err -------------")
    err = cal_error(data, regression_tree)
    print("\t--------- err : ", err)
    # 4、保存最终的CART模型
    print("----------- 4、save result -----------")
    save_model(regression_tree, "regression_tree")