Ejemplo n.º 1
0
def init_set():
    """
    从route表中取数据
    并根据此初始化训练集和测试集
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    # 节点字典
    node_dict = get_node_id_dict()

    # 获取数量
    count_sql = r"select count(*) from public.route"
    cursor.execute(count_sql)
    number = cursor.fetchone()[0]
    print(number)

    # 取出所有元素
    select_sql = r"select * from public.route order by id"
    cursor.execute(select_sql)
    rows = cursor.fetchall()

    train_number = int(0.9*number)
    test_number = number - train_number
    train_set = random.sample(rows, train_number)

    for row in rows:
        classroute = row[2]
        classroutestr = row[3]
        routetime = row[4]
        id = row[7]
        if id_in_set(train_set, row[7]):
            for i in range(len(classroute)):
                insert_sql = """
                            insert into public.md_train_set(user_id, attraction, attractionstr, visittime) 
                            values(%s, %s, %s, %s);
                    """
                cursor.execute(insert_sql, (id, classroute[i], node_dict[classroute[i]], routetime[i]))
        else:
            for i in range(len(classroute) - 1):
                insert_sql = """
                            insert into public.md_train_set(user_id, attraction, attractionstr, visittime) 
                            values(%s, %s, %s, %s);
                    """
                cursor.execute(insert_sql, (id, classroute[i], node_dict[classroute[i]], routetime[i]))
            insert_sql = """
                        insert into public.md_test_set(user_id, attraction, attractionstr, visittime) 
                            values(%s, %s, %s, %s);
            """
            cursor.execute(insert_sql, (id, classroute[-1], node_dict[classroute[-1]], routetime[-1]))
        conn.commit()

    cursor.close()
    conn.close()
Ejemplo n.º 2
0
def recommend_test(extract_fun, tuned_params):
    """
    根据GridSearchCV求得的参数  检验调参结果
    :param tuned_params:
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    # 读数据
    sql = "select * from public.ml_test_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    # 构建模型
    clf = svm.SVC(kernel=tuned_params["kernel"],
                  C=tuned_params["C"],
                  gamma=tuned_params["gamma"])
    f = open("param_" + extract_fun.__name__ + ".txt", "r", encoding="utf-8")
    x_list = f.readline()
    x_list.split()
    y_list = f.readline()
    y_list.split()
    f.close()
    x_list = json.loads(x_list)
    y_list = json.loads(y_list)
    clf.fit(x_list, y_list)

    # 二分网络
    graph = init_graph()
    print("构建二分网络完成")
    print(time.time())
    # 进行投影
    v_nodes = get_v_nodes()
    a_nodes = get_a_nodes()
    prjv_graph = project(graph, v_nodes)
    prja_graph = project(graph, a_nodes)
    print("投影完成")
    print(time.time())
    # 所有的景点
    a_nodes = list(get_node_id_dict().keys())

    # 记录结果数据
    f = open("recommend_" + extract_fun.__name__ + "_" +
             tuned_params["kernel"] + "_C" + str(tuned_params["C"]) +
             "_gamma" + str(tuned_params["gamma"]) + ".txt",
             "w",
             encoding="utf-8")

    i = 0
    for row in rows:
        i += 1
        print(i)
        user_id = VName(row[0])
        att_id = AName(row[1])
        is_link = row[4]

        if not is_link:
            continue

        sql = "select classroute from public.route_0320 where id={user_id}".format(
            user_id=row[0])
        cursor.execute(sql)
        result = cursor.fetchone()
        classroute = result[0]

        # 待预测的集合
        left_set = set(a_nodes) - set(classroute[0:-1])

        recommendation = dict()
        for anode in left_set:
            anode = AName(anode)
            if extract_fun.__name__ == "extract_direct":
                feature = extract_direct(graph, user_id, anode)
            elif extract_fun.__name__ == "extract_indirect":
                feature = extract_indirect(graph, prjv_graph, prja_graph,
                                           user_id, anode)

            result = clf.predict([feature])[0]
            dis = abs(clf.decision_function([feature]))
            if result == 1:
                recommendation[anode] = dis[0]

        recommendation = dict(
            sorted(recommendation.items(), key=lambda x: x[1], reverse=True))
        f.write("%s\t%s\t%s\t%s\n" %
                (user_id, classroute[:-1], att_id, json.dumps(recommendation)))

    f.close()
    cursor.close()
    conn.close()
Ejemplo n.º 3
0
def recommend_list(extract_fun):
    """
    利用之前生成的模型 进行推荐
    :param extract_fun:
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    # 读数据
    sql = "select * from public.ml_test_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    # 读模型
    clf = joblib.load("model_" + extract_fun.__name__ + ".pkl")

    # 二分网络
    graph = init_graph()
    print("构建二分网络完成")
    print(time.time())
    # 进行投影
    v_nodes = get_v_nodes()
    a_nodes = get_a_nodes()
    prjv_graph = project(graph, v_nodes)
    prja_graph = project(graph, a_nodes)
    print("投影完成")
    print(time.time())
    # 所有的景点
    a_nodes = list(get_node_id_dict().keys())

    # 记录结果数据
    f = open("recommend_" + extract_fun.__name__ + ".txt",
             "w",
             encoding="utf-8")

    for row in rows:
        user_id = VName(row[0])
        att_id = AName(row[1])
        is_link = row[4]

        if not is_link:
            continue

        sql = "select classroute from public.route_0320 where id={user_id}".format(
            user_id=row[0])
        cursor.execute(sql)
        result = cursor.fetchone()
        classroute = result[0]

        # 待预测的集合
        left_set = set(a_nodes) - set(classroute[0:-1])

        recommendation = dict()
        for anode in left_set:
            anode = AName(anode)
            if extract_fun.__name__ == "extract_direct":
                feature = extract_direct(graph, user_id, anode)
            elif extract_fun.__name__ == "extract_indirect":
                feature = extract_indirect(graph, prjv_graph, prja_graph,
                                           user_id, anode)

            result = clf.predict([feature])[0]
            dis = abs(clf.decision_function([feature]))
            if result == 1:
                recommendation[anode] = dis[0]

        recommendation = dict(
            sorted(recommendation.items(), key=lambda x: x[1], reverse=True))
        f.write("%s\t%s\t%s\t%s\n" %
                (user_id, classroute[:-1], att_id, json.dumps(recommendation)))

    f.close()
    cursor.close()
    conn.close()
Ejemplo n.º 4
0
#! usr/bin/env python3
# -*- coding:utf-8 -*-
import sys
sys.path.append("../")
from postgresql import get_conn, get_node_id_dict

if __name__=="__main__":
    conn = get_conn()
    cursor = conn.cursor()

    print(get_node_id_dict())
Ejemplo n.º 5
0
def write_test_feature(func, has_sd=0):
    """
    保存测试集的特征
    :param func:
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    if has_sd:
        file_name = func.__name__ + "_has_sd_test.csv"
    else:
        file_name = func.__name__ + "_test.csv"

    if func.__name__ == "extract_direct":
        title = ["anode", "snv", "sna", "cn", "jc", "aa", "pa", "sd"]
    elif func.__name__ == "extract_indirect":
        if has_sd:
            title = [
                "anode", "snv", "sna", "cn", "jc", "aa", "pa", "sd", "prj_cnv",
                "prj_cna", "prj_jcv", "prj_jca", "prj_aav", "prj_aaa",
                "prj_pav", "prj_paa", "prj_sdv", "prj_sda"
            ]
        else:
            title = [
                "anode", "snv", "sna", "cn", "jc", "aa", "pa", "sd", "prj_cnv",
                "prj_cna", "prj_jcv", "prj_jca", "prj_aav", "prj_aaa",
                "prj_pav", "prj_paa"
            ]
    else:
        print("函数错误")
        return

    # 读数据
    sql = "select * from public.ml_test_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    # 二分网络
    graph = init_graph()
    print("构建二分网络完成")
    print(time.time())
    # 进行投影
    v_nodes = get_v_nodes()
    a_nodes = get_a_nodes()
    prjv_graph = project(graph, v_nodes)
    prja_graph = project(graph, a_nodes)
    print("投影完成")
    print(time.time())
    # 所有的景点
    a_nodes = list(get_node_id_dict().keys())

    test_f = []
    i = 0
    for row in rows:
        print(i)
        i += 1

        user_id = VName(row[0])
        att_id = AName(row[1])
        is_link = row[4]

        if not is_link:
            continue

        sql = "select classroute from public.route_0320 where id={user_id}".format(
            user_id=row[0])
        cursor.execute(sql)
        result = cursor.fetchone()
        classroute = result[0]

        # 待预测的集合
        left_set = set(a_nodes) - set(classroute[0:-1])

        for anode in left_set:
            anode = AName(anode)
            if func.__name__ == "extract_direct":
                feature = func(graph, user_id, anode)
            elif func.__name__ == "extract_indirect":
                feature = func(graph, prjv_graph, prja_graph, user_id, anode,
                               has_sd)
            else:
                print("函数名错误")

            line = [anode]
            line.extend(feature)
            test_f.append(line)

    # 写入到csv文件中
    df = pd.DataFrame(test_f, columns=title)
    df.to_csv(file_name, encoding="utf-8")

    print("测试特征保存完成")
Ejemplo n.º 6
0
def init_set():
    """
    从route表中取数据
    并根据此初始化训练集和测试集
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    # 节点字典
    node_dict = get_node_id_dict()
    all_nodes = list(node_dict.keys())

    # 获取数量
    count_sql = r"select count(*) from public.train_set"
    cursor.execute(count_sql)
    number = cursor.fetchone()[0]
    print(number)

    # 从train_set中读数据 加入ml_train_set和ml_graph_set中
    sql = "select * from public.train_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    train_number = int(0.2 * number)
    train_set = random.sample(rows, train_number)

    for row in rows:
        classroute = row[2]
        classroutestr = row[3]
        routetime = row[4]
        id = row[7]
        if not id_in_set(train_set, row[7]):
            for i in range(len(classroute)):
                insert_sql = """
                            insert into public.ml_graph_set(user_id, attraction, attractionstr, visittime, islink) 
                            values(%s, %s, %s, %s, %s);
                    """
                cursor.execute(insert_sql,
                               (id, classroute[i], node_dict[classroute[i]],
                                routetime[i], True))
        else:
            for i in range(len(classroute) - 1):
                insert_sql = """
                            insert into public.ml_graph_set(user_id, attraction, attractionstr, visittime, islink) 
                            values(%s, %s, %s, %s, %s);
                    """
                cursor.execute(insert_sql,
                               (id, classroute[i], node_dict[classroute[i]],
                                routetime[i], True))
            insert_sql = """
                        insert into public.ml_train_set(user_id, attraction, attractionstr, visittime, islink) 
                            values(%s, %s, %s, %s, %s);
            """
            cursor.execute(insert_sql,
                           (id, classroute[-1], node_dict[classroute[-1]],
                            routetime[-1], True))
            left_nodes = set(all_nodes) - set(classroute)
            neg_examples = random.sample(left_nodes, 2)
            for neg in neg_examples:
                cursor.execute(insert_sql,
                               (id, neg, node_dict[neg], None, False))

        conn.commit()

    # 从test_set中读数据 加入ml_test_set和ml_graph_set中
    sql = "select * from public.test_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    for row in rows:
        classroute = row[2]
        classroutestr = row[3]
        routetime = row[4]
        id = row[7]
        for i in range(len(classroute) - 1):
            insert_sql = """
                        insert into public.ml_graph_set(user_id, attraction, attractionstr, visittime, islink) 
                        values(%s, %s, %s, %s, %s);
                """
            cursor.execute(insert_sql,
                           (id, classroute[i], node_dict[classroute[i]],
                            routetime[i], True))
        insert_sql = """
                    insert into public.ml_test_set(user_id, attraction, attractionstr, visittime, islink) 
                        values(%s, %s, %s, %s, %s);
        """
        cursor.execute(insert_sql,
                       (id, classroute[-1], node_dict[classroute[-1]],
                        routetime[-1], True))
        left_nodes = set(all_nodes) - set(classroute)
        neg_examples = random.sample(left_nodes, 2)
        for neg in neg_examples:
            cursor.execute(insert_sql, (id, neg, node_dict[neg], None, False))

        conn.commit()

    cursor.close()
    conn.close()
Ejemplo n.º 7
0
def recommend(test_file, model_path, save_file):
    """
    根据上面生成的模型
    进行预测推荐
    :param model_path:
    :return:
    """
    # 加载模型
    model = xgb.Booster()
    model.load_model(model_path)
    # print(dir(model))

    # 构造训练集数据
    test_data = pd.read_csv(test_file)
    anode_list = list(test_data["anode"])
    test_set = test_data.drop("anode", axis=1)
    # xgb矩阵赋值
    xgb_test = xgb.DMatrix(test_set)

    # 进行预测
    preds = model.predict(xgb_test)

    conn = get_conn()
    cursor = conn.cursor()
    sql = "select * from public.ml_test_set "
    cursor.execute(sql)
    rows = cursor.fetchall()

    # 所有的景区节点
    a_nodes = list(get_node_id_dict().keys())

    index = 0
    f = open(save_file, "w", encoding="utf-8")
    for row in rows:
        user_id = VName(row[0])
        att_id = AName(row[1])
        is_link = row[4]

        # 对于测试的负案例 直接跳过
        if not is_link:
            continue

        sql = "select classroute from public.route_0320 where id={user_id}".format(user_id=row[0])
        cursor.execute(sql)
        result = cursor.fetchone()
        classroute = result[0]

        # 待预测的集合
        left_set = set(a_nodes) - set(classroute[0:-1])
        n = len(left_set)

        recommend_list = dict()
        for i in range(n):
            recommend_list[anode_list[index+i]] = preds[index+i]
        index += n

        recommend_list = dict(sorted(recommend_list.items(), key=lambda x:x[1], reverse=True))
        f.write("%s\t%s\t%s\t%s\n" % (user_id, classroute[:-1], att_id, json.dumps(recommend_list, cls=MyEncoder)))

    f.close()
    cursor.close()
    conn.close()
Ejemplo n.º 8
0
def recommend_list(route_length=3, coeffs=[]):
    """

    :param route_length:
    :return:
    """
    nodes_list = gen_nodes_list()
    matrix = init_matrix()

    conn = get_conn()
    cursor = conn.cursor()

    if route_length == 3:
        final_matrix = (matrix ** 3)
    elif route_length == 5:
        if coeffs:
            final_matrix = (matrix ** 3) + (matrix ** 5) / coeffs[0]
        else:
            final_matrix = (matrix ** 3) + (matrix ** 5) / 20
    elif route_length == 7:
        if coeffs:
            final_matrix = (matrix ** 3) + (matrix ** 5) / coeffs[0] + (matrix ** 7) / coeffs[1]
        else:
            final_matrix = (matrix ** 3) + (matrix ** 5) / 120 + (matrix ** 7) / 5040
    else:
        print("暂时没有对应的公式")
        return

    sql = "select * from public.test_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    file_name = "cn_route_" + str(route_length) + ".txt"
    f = open(file_name, "w", encoding="utf-8")

    # 所有的景点
    a_nodes = list(get_node_id_dict().keys())
    # 用于记录所有测试案例的推荐结果
    record_result = dict()
    for row in rows:
        classroute = row[2]
        user_id = VName(row[7])
        user_index = nodes_list.index(user_id)

        # 剩余的 待推荐景点
        left_nodes = list(set(a_nodes) - set(classroute[:-1]))
        result_dict = dict()
        for node in left_nodes:
            node = AName(node)
            att_index = nodes_list.index(node)

            imag_coeff = final_matrix[user_index, att_index].imag

            if imag_coeff == 0.0:
                continue
            else:
                result_dict[node] = imag_coeff

        # 对结果进行排序
        result_dict = dict(sorted(result_dict.items(), key=lambda x:x[1], reverse=True))
        # 记录推荐结果
        f.write("%s\t%s\t%s\t%s\n" % (user_id, classroute[:-1], AName(classroute[-1]), json.dumps(result_dict)))
        record_result[user_id] = {"user_id":user_id, "classroute":classroute[-1], "answer":AName(classroute[-1]),
                                  "recommend":result_dict}
    return record_result