Exemple #1
0
def top_N_except_pop(file):
    """
    去掉流行景点的准确度
    :param file:
    :return:
    """
    for n in range(1, 221):
        f = open(
            file,
            "r",
        )

        right = 0
        total = 0
        for line in f:
            line = line.strip()
            s_lst = line.split('\t')
            answer = AName(s_lst[2])
            predict_lst = s_lst[3]
            predict_lst = predict_lst.replace("'", '"')
            predict_lst = json.loads(predict_lst)
            predict_lst = list(predict_lst.keys())

            if answer not in POPULAR_ATTR.keys():
                total += 1
            else:
                continue

            if AName(answer) in predict_lst[:n]:
                right += 1
        print("top_%s: %s" % (n, right / total))
Exemple #2
0
def av_position_except_pop(file):
    """
    不计流行景点
    对结果进行评价
    计算正确景点的平均推荐位置
    :param file:
    :return:
    """
    f = open(file, "r", encoding="utf-8")

    pos_lst = list()
    for line in f:
        line = line.strip()
        s_lst = line.split('\t')
        answer = AName(s_lst[2])
        predict_lst = s_lst[3]
        predict_lst = predict_lst.replace("'", '"')
        predict_lst = json.loads(predict_lst)
        predict_lst = list(predict_lst.keys())

        # 不计流行景点
        if answer in POPULAR_ATTR.keys():
            continue

        try:
            pos = predict_lst.index(AName(answer))
            pos_lst.append(pos / len(predict_lst))
        except Exception as e:
            pos_lst.append(1)

    print(pos_lst)

    av_pos = sum(pos_lst) / len(pos_lst)
    print(av_pos)
Exemple #3
0
def init_matrix():
    """
    根据训练集的数据
    生成邻接矩阵
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    # 矩阵的维数
    nodes_list = gen_nodes_list()
    n = len(nodes_list)
    # 初始化矩阵
    matrix = np.mat(np.full((n, n), np.complex(0.0, 0.0)))

    sql = "select * from public.train_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    # 填充训练数据
    for row in rows:
        classroute = row[2]
        user_id = VName(row[7])
        user_index = nodes_list.index(user_id)

        for att_id in classroute:
            att = AName(att_id)
            att_index = nodes_list.index(att)

            matrix[user_index, att_index] = np.complex(0.0, 1.0)
            matrix[att_index, user_index] = np.complex(0.0, -1.0)

    sql = "select * from public.train_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    # 填充测试数据
    sql = "select * from public.test_set"
    cursor.execute(sql)
    rows = cursor.fetchall()
    for row in rows:
        classroute = row[2]
        user_id = VName(row[7])
        user_index = nodes_list.index(user_id)

        for att_id in classroute[:-1]:
            att = AName(att_id)
            att_index = nodes_list.index(att)

            matrix[user_index, att_index] = np.complex(0.0, 1.0)
            matrix[att_index, user_index] = np.complex(0.0, -1.0)

    return matrix
Exemple #4
0
def novelty(file):
    """
    推荐列表中物品的平均流行度
    :param file:
    :return:
    """
    f = open(file, "r", encoding="utf-8")
    # 保存所有的推荐结果
    all_list = list()
    for line in f:
        line = line.strip()
        s_lst = line.split('\t')
        answer = AName(s_lst[2])
        predict_lst = s_lst[3]
        predict_lst = predict_lst.replace("'", '"')
        predict_lst = json.loads(predict_lst)
        predict_lst = list(predict_lst.keys())
        all_list.append(predict_lst)
    f.close()

    a_degree = att_degree()
    # print(a_degree)

    for i in range(1, 51):
        nov = 0
        for j in range(len(all_list)):
            for att in all_list[j][:i]:
                nov += a_degree[att] / i
        nov = nov / len(all_list)
        print(i, nov)
Exemple #5
0
def coverage(file):
    """
    推荐系统推荐给所有用户的物品数占总物品数的比例
    :param file:
    :return:
    """
    f = open(file, "r", encoding="utf-8")
    # 保存所有的推荐结果
    all_list = list()
    for line in f:
        line = line.strip()
        s_lst = line.split('\t')
        answer = AName(s_lst[2])
        predict_lst = s_lst[3]
        predict_lst = predict_lst.replace("'", '"')
        predict_lst = json.loads(predict_lst)
        predict_lst = list(predict_lst.keys())
        all_list.append(predict_lst)
    f.close()

    for i in range(1, 51):
        all_rec_items = set()
        for j in range(len(all_list)):
            all_rec_items = all_rec_items.union(set(all_list[j][:i]))
        print(i, len(all_rec_items) / 221)
Exemple #6
0
def inter_diversity(file):
    """
    整体多样性
    :param file:
    :return:
    """
    f = open(file, "r", encoding="utf-8")
    # 保存所有的推荐结果
    all_list = list()
    for line in f:
        line = line.strip()
        s_lst = line.split('\t')
        answer = AName(s_lst[2])
        predict_lst = s_lst[3]
        predict_lst = predict_lst.replace("'", '"')
        predict_lst = json.loads(predict_lst)
        predict_lst = list(predict_lst.keys())
        all_list.append(predict_lst)
    f.close()

    for L in range(1, 51):
        diver = 0
        for i in range(len(all_list)):
            for j in range(len(all_list)):
                if i == j:
                    continue
                diver += 1 - len(
                    set(all_list[i][:L]).intersection(set(
                        all_list[j][:L]))) / L
        diver = diver / (len(all_list) * (len(all_list) - 1))
        print(L, diver)
Exemple #7
0
def test():
    """
    开始进行预测
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    # 初始化矩阵
    user_dict, item_user = form_data()
    print("初始化矩阵完成")
    print(time.time())

    sql = "select * from public.test_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    f = open("recommend_20.txt", "w", encoding="utf-8")
    i = 0
    for row in rows:
        i += 1
        if i % 100 == 0:
            print("完成:" + str(i))

        user = row[7]
        classroute = row[2]

        rec_dict = recommend_by_user(user_dict, item_user, user, 20)
        f.writelines("%s\t%s\t%s\t%s\n" %
                     (user, classroute[:-1], AName(
                         classroute[-1]), json.dumps(rec_dict)))

    f.close()
    cursor.close()
    conn.close()
Exemple #8
0
def recommend_by_user(user_dict, item_user, user, k=10):
    """
    对该用户进行推荐
    :param user_dict:
    :param item_user:
    :param user:
    :param k: 选择最近的k个邻居
    :return:
    """
    neighbor_dict = gen_near_neighbors(user_dict, item_user, user)
    neighbor_dict = dict(list(neighbor_dict.items())[:k])
    recommend_dict = dict()
    route = list(user_dict[user].keys())
    left_nodes = set(item_user.keys()) - set(route)
    for item in left_nodes:
        up = 0
        down = 0
        for neighbor in neighbor_dict.keys():
            if item in user_dict[neighbor].keys():
                up += neighbor_dict[neighbor] * user_dict[neighbor][item]
            down += neighbor_dict[neighbor]
        if up != 0:
            recommend_dict[AName(item)] = up / down

    recommend_dict = dict(
        sorted(recommend_dict.items(), key=lambda x: x[1], reverse=True))
    return recommend_dict
Exemple #9
0
def gen_nodes_list():
    """
    产生所有的节点列表
    包括游客和景点
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    nodes_list = list()

    # 先添加游客节点
    sql = "select * from public.route_0320"
    cursor.execute(sql)
    rows = cursor.fetchall()

    for row in rows:
        user_id = row[7]
        user_id = VName(user_id)
        nodes_list.append(user_id)

    # 添加景点节点
    sql = "select * from public.node_1023"
    cursor.execute(sql)
    rows = cursor.fetchall()
    for row in rows:
        att_id = row[0]
        att_id = AName(att_id)
        nodes_list.append(att_id)
    return nodes_list
def get_dis_feature(node_loc_dict, classroute, att_id):
    # 没有历史记录
    if len(classroute) == 0:
        return [0, 0, 0, 0]
    # 计算最近一次游览的距离
    last_loc = node_loc_dict[AName(classroute[-1])]
    att_loc = node_loc_dict[att_id]
    last_d = haversine(last_loc[0], last_loc[1], att_loc[0], att_loc[1])
    min_d = last_d
    max_d = last_d
    total_d = 0
    # 计算最小距离,平均距离, 最大距离
    for node in classroute:
        temp_loc = node_loc_dict[AName(node)]
        dis = haversine(temp_loc[0], temp_loc[1], att_loc[0], att_loc[1])
        total_d += dis
        if dis < min_d:
            min_d = dis
        if dis > max_d:
            max_d = dis
    mean_d = total_d / len(classroute)
    return [min_d, mean_d, max_d, last_d]
Exemple #11
0
def top_N(file):
    for n in range(1, 221):
        f = open(
            file,
            "r",
        )

        right = 0
        total = 0
        for line in f:
            total += 1
            line = line.strip()
            s_lst = line.split('\t')
            answer = AName(s_lst[2])
            predict_lst = s_lst[3]
            predict_lst = predict_lst.replace("'", '"')
            predict_lst = json.loads(predict_lst)
            predict_lst = list(predict_lst.keys())

            if AName(answer) in predict_lst[:n]:
                right += 1
        print("top_%s: %s" % (n, right / total))
Exemple #12
0
def cal_precision(result, n):
    """
    计算前n个推荐结果的准确率
    :param result:
    :param n:
    :return:
    """
    total = 0
    right = 0
    for user_id in result.keys():
        total += 1
        answer = result[user_id]["answer"]
        recommend = result[user_id]["recommend"]
        if AName(answer) in list(recommend.keys())[:n]:
            right += 1
    return right / total
Exemple #13
0
def predict(extract_fun):
    """
    对结果进行预测
    :param extract_fun:
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    # 恢复模型
    clf = joblib.load("model_" + extract_fun.__name__ + ".pkl")

    # 二分网络
    graph = init_graph()
    print("构建二分网络完成")
    print(time.time())
    # 进行投影
    v_nodes = get_v_nodes()
    a_nodes = get_a_nodes()
    prjv_graph = project(graph, v_nodes)
    prja_graph = project(graph, a_nodes)
    print("投影完成")
    print(time.time())

    sql = "select * from public.ml_test_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    f = open("predict_" + extract_fun.__name__ + ".txt", "w", encoding="utf-8")
    for row in rows:
        user_id = VName(row[0])
        att_id = AName(row[1])
        is_link = row[4]
        if extract_fun.__name__ == "extract_direct":
            feature = extract_direct(graph, user_id, att_id)
        elif extract_fun.__name__ == "extract_indirect":
            feature = extract_indirect(graph, prjv_graph, prja_graph, user_id,
                                       att_id)

        result = clf.predict([feature])[0]
        f.writelines("%s\t%s\t%s\t%s\n" % (user_id, att_id, is_link, result))

    f.close()
    cursor.close()
    conn.close()
def get_node_loc_dict():
    """
    景点id  及其对应的经纬度
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    sql = "select * from public.node_1023"
    cursor.execute(sql)
    rows = cursor.fetchall()

    node_loc_dict = dict()
    for row in rows:
        att_id = AName(row[0])
        lon = row[2]
        lat = row[3]

        node_loc_dict[att_id] = (lon, lat)
    return node_loc_dict
def cal_train_distance(save_dis_file="train_distance.csv"):
    """
    计算 训练集 的距离特征
    并保存
    :param save_dis_file: 保存训练集距离特征的文件
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    # 节点对应的经纬度
    node_loc_dict = get_node_loc_dict()

    sql = "select * from public.ml_train_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    distance = []
    title = ["min_d", "mean_d", "max_d", "last_d"]

    for row in rows:
        user_id = VName(row[0])
        att_id = AName(row[1])

        sql = "select * from public.route_0320 where id={user_id}".format(
            user_id=row[0])
        cursor.execute(sql)
        item = cursor.fetchone()
        classroute = item[2][:-1]

        dis_feature = get_dis_feature(node_loc_dict, classroute, att_id)
        distance.append(dis_feature)

    # 写入到csv文件中
    df = pd.DataFrame(distance, columns=title)
    df.to_csv(save_dis_file, encoding="utf-8")
def recommend(test_file, model_path, save_file):
    """
    根据上面生成的模型
    进行预测推荐
    :param model_path:
    :return:
    """
    # 加载模型
    model = xgb.Booster()
    model.load_model(model_path)
    # print(dir(model))

    # 构造训练集数据
    test_data = pd.read_csv(test_file)
    anode_list = list(test_data["anode"])
    test_set = test_data.drop("anode", axis=1)
    # xgb矩阵赋值
    xgb_test = xgb.DMatrix(test_set)

    # 进行预测
    preds = model.predict(xgb_test)

    conn = get_conn()
    cursor = conn.cursor()
    sql = "select * from public.ml_test_set "
    cursor.execute(sql)
    rows = cursor.fetchall()

    # 所有的景区节点
    a_nodes = list(get_node_id_dict().keys())

    index = 0
    f = open(save_file, "w", encoding="utf-8")
    for row in rows:
        user_id = VName(row[0])
        att_id = AName(row[1])
        is_link = row[4]

        # 对于测试的负案例 直接跳过
        if not is_link:
            continue

        sql = "select classroute from public.route_0320 where id={user_id}".format(user_id=row[0])
        cursor.execute(sql)
        result = cursor.fetchone()
        classroute = result[0]

        # 待预测的集合
        left_set = set(a_nodes) - set(classroute[0:-1])
        n = len(left_set)

        recommend_list = dict()
        for i in range(n):
            recommend_list[anode_list[index+i]] = preds[index+i]
        index += n

        recommend_list = dict(sorted(recommend_list.items(), key=lambda x:x[1], reverse=True))
        f.write("%s\t%s\t%s\t%s\n" % (user_id, classroute[:-1], att_id, json.dumps(recommend_list, cls=MyEncoder)))

    f.close()
    cursor.close()
    conn.close()
def write_test_feature(func, has_sd=0):
    """
    保存测试集的特征
    :param func:
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    if has_sd:
        file_name = func.__name__ + "_has_sd_test.csv"
    else:
        file_name = func.__name__ + "_test.csv"

    if func.__name__ == "extract_direct":
        title = ["anode", "snv", "sna", "cn", "jc", "aa", "pa", "sd"]
    elif func.__name__ == "extract_indirect":
        if has_sd:
            title = [
                "anode", "snv", "sna", "cn", "jc", "aa", "pa", "sd", "prj_cnv",
                "prj_cna", "prj_jcv", "prj_jca", "prj_aav", "prj_aaa",
                "prj_pav", "prj_paa", "prj_sdv", "prj_sda"
            ]
        else:
            title = [
                "anode", "snv", "sna", "cn", "jc", "aa", "pa", "sd", "prj_cnv",
                "prj_cna", "prj_jcv", "prj_jca", "prj_aav", "prj_aaa",
                "prj_pav", "prj_paa"
            ]
    else:
        print("函数错误")
        return

    # 读数据
    sql = "select * from public.ml_test_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    # 二分网络
    graph = init_graph()
    print("构建二分网络完成")
    print(time.time())
    # 进行投影
    v_nodes = get_v_nodes()
    a_nodes = get_a_nodes()
    prjv_graph = project(graph, v_nodes)
    prja_graph = project(graph, a_nodes)
    print("投影完成")
    print(time.time())
    # 所有的景点
    a_nodes = list(get_node_id_dict().keys())

    test_f = []
    i = 0
    for row in rows:
        print(i)
        i += 1

        user_id = VName(row[0])
        att_id = AName(row[1])
        is_link = row[4]

        if not is_link:
            continue

        sql = "select classroute from public.route_0320 where id={user_id}".format(
            user_id=row[0])
        cursor.execute(sql)
        result = cursor.fetchone()
        classroute = result[0]

        # 待预测的集合
        left_set = set(a_nodes) - set(classroute[0:-1])

        for anode in left_set:
            anode = AName(anode)
            if func.__name__ == "extract_direct":
                feature = func(graph, user_id, anode)
            elif func.__name__ == "extract_indirect":
                feature = func(graph, prjv_graph, prja_graph, user_id, anode,
                               has_sd)
            else:
                print("函数名错误")

            line = [anode]
            line.extend(feature)
            test_f.append(line)

    # 写入到csv文件中
    df = pd.DataFrame(test_f, columns=title)
    df.to_csv(file_name, encoding="utf-8")

    print("测试特征保存完成")
def write_train_feature(func, have_sd=0):
    """
    生成训练集特征
    并写入文件
    :param func:
    :param have_sd: 间接特征 是否含有最短距离
    :return:
    """
    if have_sd:
        file_name = func.__name__ + "_has_sd_train.csv"
    else:
        file_name = func.__name__ + "_train.csv"

    # 读取数据
    conn = get_conn()
    cursor = conn.cursor()

    # 二分网络
    graph = init_graph()
    print("构建二分网络完成")
    print(time.time())
    # 进行投影
    v_nodes = get_v_nodes()
    a_nodes = get_a_nodes()
    prjv_graph = project(graph, v_nodes)
    prja_graph = project(graph, a_nodes)
    print("网络投影完成")
    print(time.time())

    sql = "select * from public.ml_train_set"
    cursor.execute(sql)
    rows = cursor.fetchall()
    print("len_rows:" + str(len(rows)))

    if func.__name__ == "extract_direct":
        title = ["label", "snv", "sna", "cn", "jc", "aa", "pa", "sd"]
    elif func.__name__ == "extract_indirect":
        if have_sd:
            title = [
                "label", "snv", "sna", "cn", "jc", "aa", "pa", "sd", "prj_cnv",
                "prj_cna", "prj_jcv", "prj_jca", "prj_aav", "prj_aaa",
                "prj_pav", "prj_paa", "prj_sdv", "prj_sda"
            ]
        else:
            title = [
                "label", "snv", "sna", "cn", "jc", "aa", "pa", "sd", "prj_cnv",
                "prj_cna", "prj_jcv", "prj_jca", "prj_aav", "prj_aaa",
                "prj_pav", "prj_paa"
            ]
    else:
        print("函数错误")
        return

    i = 0
    train_f = []
    for row in rows:
        i += 1
        print(i)

        user_id = VName(row[0])
        att_id = AName(row[1])

        if func.__name__ == "extract_direct":
            feature = func(graph, user_id, att_id)
        elif func.__name__ == "extract_indirect":
            feature = func(graph, prjv_graph, prja_graph, user_id, att_id,
                           have_sd)
        else:
            print("函数错误")

        if row[4]:
            line = [1]
            line.extend(feature)
            train_f.append(line)
        else:
            line = [0]
            line.extend(feature)
            train_f.append(line)

    # 写入到csv文件中
    df = pd.DataFrame(train_f, columns=title)
    df.to_csv(file_name, encoding="utf-8")

    print("训练特征保存完成")
Exemple #19
0
def train(extract_fun):
    """
    训练模型
    :param: extract_fun
    :return:
    """
    # 读取数据
    conn = get_conn()
    cursor = conn.cursor()

    # 二分网络
    graph = init_graph()
    print("构建二分网络完成")
    print(time.time())
    # 进行投影
    v_nodes = get_v_nodes()
    a_nodes = get_a_nodes()
    prjv_graph = project(graph, v_nodes)
    prja_graph = project(graph, a_nodes)
    print("网络投影完成")
    print(time.time())

    sql = "select * from public.ml_train_set"
    cursor.execute(sql)
    rows = cursor.fetchall()
    print(len(rows))

    # 保存训练数据
    X_list = list()
    Y_list = list()

    i = 0
    for row in rows:
        user_id = VName(row[0])
        att_id = AName(row[1])

        i += 1
        print(i)
        # print(time.time())

        if extract_fun.__name__ == "extract_direct":
            feature = extract_direct(graph, user_id, att_id)
        elif extract_fun.__name__ == "extract_indirect":
            feature = extract_indirect(graph, prjv_graph, prja_graph, user_id,
                                       att_id)
            # print(feature)
        else:
            print("wrong function")
            break
        X_list.append(feature)
        if row[4]:
            Y_list.append(1)
        else:
            Y_list.append(-1)
    print("生成训练数据")
    print(time.time())

    cursor.close()
    conn.close()

    # 记录X_list, Y_list
    f = open("param_" + extract_fun.__name__ + ".txt", "w", encoding="utf-8")
    f.writelines(json.dumps(X_list) + "\n")
    f.writelines(json.dumps(Y_list) + "\n")
    f.close()
    print("训练数据保存成功")
    print(time.time())

    clf = svm.SVC(kernel="linear")
    clf.fit(X_list, Y_list)
    print("训练数据结束")
    print(time.time())

    joblib.dump(clf, "model_" + extract_fun.__name__ + ".pkl")
    print("保存模型")
    print(time.time())
Exemple #20
0
def recommend_test(extract_fun, tuned_params):
    """
    根据GridSearchCV求得的参数  检验调参结果
    :param tuned_params:
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    # 读数据
    sql = "select * from public.ml_test_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    # 构建模型
    clf = svm.SVC(kernel=tuned_params["kernel"],
                  C=tuned_params["C"],
                  gamma=tuned_params["gamma"])
    f = open("param_" + extract_fun.__name__ + ".txt", "r", encoding="utf-8")
    x_list = f.readline()
    x_list.split()
    y_list = f.readline()
    y_list.split()
    f.close()
    x_list = json.loads(x_list)
    y_list = json.loads(y_list)
    clf.fit(x_list, y_list)

    # 二分网络
    graph = init_graph()
    print("构建二分网络完成")
    print(time.time())
    # 进行投影
    v_nodes = get_v_nodes()
    a_nodes = get_a_nodes()
    prjv_graph = project(graph, v_nodes)
    prja_graph = project(graph, a_nodes)
    print("投影完成")
    print(time.time())
    # 所有的景点
    a_nodes = list(get_node_id_dict().keys())

    # 记录结果数据
    f = open("recommend_" + extract_fun.__name__ + "_" +
             tuned_params["kernel"] + "_C" + str(tuned_params["C"]) +
             "_gamma" + str(tuned_params["gamma"]) + ".txt",
             "w",
             encoding="utf-8")

    i = 0
    for row in rows:
        i += 1
        print(i)
        user_id = VName(row[0])
        att_id = AName(row[1])
        is_link = row[4]

        if not is_link:
            continue

        sql = "select classroute from public.route_0320 where id={user_id}".format(
            user_id=row[0])
        cursor.execute(sql)
        result = cursor.fetchone()
        classroute = result[0]

        # 待预测的集合
        left_set = set(a_nodes) - set(classroute[0:-1])

        recommendation = dict()
        for anode in left_set:
            anode = AName(anode)
            if extract_fun.__name__ == "extract_direct":
                feature = extract_direct(graph, user_id, anode)
            elif extract_fun.__name__ == "extract_indirect":
                feature = extract_indirect(graph, prjv_graph, prja_graph,
                                           user_id, anode)

            result = clf.predict([feature])[0]
            dis = abs(clf.decision_function([feature]))
            if result == 1:
                recommendation[anode] = dis[0]

        recommendation = dict(
            sorted(recommendation.items(), key=lambda x: x[1], reverse=True))
        f.write("%s\t%s\t%s\t%s\n" %
                (user_id, classroute[:-1], att_id, json.dumps(recommendation)))

    f.close()
    cursor.close()
    conn.close()
Exemple #21
0
def recommend_list(extract_fun):
    """
    利用之前生成的模型 进行推荐
    :param extract_fun:
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    # 读数据
    sql = "select * from public.ml_test_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    # 读模型
    clf = joblib.load("model_" + extract_fun.__name__ + ".pkl")

    # 二分网络
    graph = init_graph()
    print("构建二分网络完成")
    print(time.time())
    # 进行投影
    v_nodes = get_v_nodes()
    a_nodes = get_a_nodes()
    prjv_graph = project(graph, v_nodes)
    prja_graph = project(graph, a_nodes)
    print("投影完成")
    print(time.time())
    # 所有的景点
    a_nodes = list(get_node_id_dict().keys())

    # 记录结果数据
    f = open("recommend_" + extract_fun.__name__ + ".txt",
             "w",
             encoding="utf-8")

    for row in rows:
        user_id = VName(row[0])
        att_id = AName(row[1])
        is_link = row[4]

        if not is_link:
            continue

        sql = "select classroute from public.route_0320 where id={user_id}".format(
            user_id=row[0])
        cursor.execute(sql)
        result = cursor.fetchone()
        classroute = result[0]

        # 待预测的集合
        left_set = set(a_nodes) - set(classroute[0:-1])

        recommendation = dict()
        for anode in left_set:
            anode = AName(anode)
            if extract_fun.__name__ == "extract_direct":
                feature = extract_direct(graph, user_id, anode)
            elif extract_fun.__name__ == "extract_indirect":
                feature = extract_indirect(graph, prjv_graph, prja_graph,
                                           user_id, anode)

            result = clf.predict([feature])[0]
            dis = abs(clf.decision_function([feature]))
            if result == 1:
                recommendation[anode] = dis[0]

        recommendation = dict(
            sorted(recommendation.items(), key=lambda x: x[1], reverse=True))
        f.write("%s\t%s\t%s\t%s\n" %
                (user_id, classroute[:-1], att_id, json.dumps(recommendation)))

    f.close()
    cursor.close()
    conn.close()
Exemple #22
0
def recommend_list(route_length=3, coeffs=[]):
    """

    :param route_length:
    :return:
    """
    nodes_list = gen_nodes_list()
    matrix = init_matrix()

    conn = get_conn()
    cursor = conn.cursor()

    if route_length == 3:
        final_matrix = (matrix ** 3)
    elif route_length == 5:
        if coeffs:
            final_matrix = (matrix ** 3) + (matrix ** 5) / coeffs[0]
        else:
            final_matrix = (matrix ** 3) + (matrix ** 5) / 20
    elif route_length == 7:
        if coeffs:
            final_matrix = (matrix ** 3) + (matrix ** 5) / coeffs[0] + (matrix ** 7) / coeffs[1]
        else:
            final_matrix = (matrix ** 3) + (matrix ** 5) / 120 + (matrix ** 7) / 5040
    else:
        print("暂时没有对应的公式")
        return

    sql = "select * from public.test_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    file_name = "cn_route_" + str(route_length) + ".txt"
    f = open(file_name, "w", encoding="utf-8")

    # 所有的景点
    a_nodes = list(get_node_id_dict().keys())
    # 用于记录所有测试案例的推荐结果
    record_result = dict()
    for row in rows:
        classroute = row[2]
        user_id = VName(row[7])
        user_index = nodes_list.index(user_id)

        # 剩余的 待推荐景点
        left_nodes = list(set(a_nodes) - set(classroute[:-1]))
        result_dict = dict()
        for node in left_nodes:
            node = AName(node)
            att_index = nodes_list.index(node)

            imag_coeff = final_matrix[user_index, att_index].imag

            if imag_coeff == 0.0:
                continue
            else:
                result_dict[node] = imag_coeff

        # 对结果进行排序
        result_dict = dict(sorted(result_dict.items(), key=lambda x:x[1], reverse=True))
        # 记录推荐结果
        f.write("%s\t%s\t%s\t%s\n" % (user_id, classroute[:-1], AName(classroute[-1]), json.dumps(result_dict)))
        record_result[user_id] = {"user_id":user_id, "classroute":classroute[-1], "answer":AName(classroute[-1]),
                                  "recommend":result_dict}
    return record_result