Example #1
0
def dataPreprocessiong():
    logging.warning(u"运行日志:将从数据库中读出的数据进行数据处理")
    result_dr, result_course, result_user = getDataFromDB()

    drList = formatDataByType(SetType.SetType_List, result_dr)
    userList = formatDataByType(SetType.SetType_Set, result_user)
    courseList = formatDataByType(SetType.SetType_List, result_course)

    dr_length = len(drList)
    course_length = len(courseList)
    user_length = len(userList)

    user_mdic, user_mdicr = makeDic(userList)
    course_mdic, course_mdicr = makeDic(courseList)

    result, learned = [], []
    for dr in drList:
        temp_result, temp_learned = [], []
        #print dr[0]
        temp_result.append(user_mdic[dr[0]] + 1)
        temp_result.append(dr[1])
        temp_result.append(dr[2] * 5)
        temp_learned.append(dr[0])
        temp_learned.append(dr[1])
        temp_learned.append(get_keys(dr[1], courseList))
        result.append(temp_result)
        learned.append(temp_learned)

    data = pd.DataFrame(result)

    if drawRoc:
        saveBgInputMartix(data, user_mdicr)

    return data, learned, course_mdic, course_mdicr, user_mdic, user_mdicr, \
           dr_length, course_length, user_length, courseList
Example #2
0
def dataPreprocessiong():
    result_dr, result_course, result_user = getDataFromDB()

    drList = formatDataByType(SetType.SetType_List, result_dr)
    userList = formatDataByType(SetType.SetType_Set, result_user)
    courseList = formatDataByType(SetType.SetType_List, result_course)

    dr_length = len(drList)
    course_length = len(courseList)
    user_length = len(userList)

    user_mdic, user_mdicr = makeDic(userList)
    course_mdic, course_mdicr = makeDic(courseList)

    result, learned = [], []
    for dr in drList:
        temp_result, temp_learned = [], []
        temp_result.append(user_mdic[dr[0]] + 1)
        temp_result.append(dr[1])
        temp_result.append(dr[2] * 5)
        temp_learned.append(dr[0])
        temp_learned.append(dr[1])
        temp_learned.append(get_keys(dr[1], courseList))
        result.append(temp_result)
        learned.append(temp_learned)

    data = pd.DataFrame(result)


    return data, learned, course_mdic, course_mdicr, user_mdic, user_mdicr, \
           dr_length, course_length, user_length, courseList
def get_course_info_with_image():
    dbHandle = DatabaseIo()
    if not dbHandle:
        return None
    # sql_course = "select id, name, image, description from course5000"
    sql_course = DataBaseQuery["interface_image"]
    # sql_classify = "select id, course_name, classify_name from course_classify5000"
    sql_classify = DataBaseQuery["classify_info"]
    result_course = dbHandle.doSql(execType=DataBaseOperateType.SearchMany,
                                   sql=sql_course)
    result_course_classify = dbHandle.doSql(
        execType=DataBaseOperateType.SearchMany, sql=sql_classify)
    dbHandle.changeCloseFlag()
    courseList = formatDataByType(SetType.SetType_List, result_course)
    classifyList = formatDataByType(SetType.SetType_List,
                                    result_course_classify)
    return courseList, classifyList
def coll_main():
    start = time.clock()
    # 获取数据
    result_dr, result_course, result_user = cR.getDataFromDB()

    # 把从course_dr中读取出来的数据以列表形式存储
    k = list()
    k = formatDataByType(SetType.SetType_List, result_dr)
    # 按course_id升序排序
    result_list = sorted(k, key=lambda z: z[1])
    # 读取user的id
    # 把从user_basic_info中读取出来的数据以列表形式存储
    user_basic_info_list = formatDataByType(SetType.SetType_Set, result_user)

    # 把从course_info中读取出来的数据以列表形式存储
    course_info_list = formatDataByType(SetType.SetType_List, result_course)

    course_length = len(result_course)
    user_length = len(result_user)
    range_length = len(result_dr)
    movies = result_course

    # 建立字典,实现课程id和索引序号之间的映射,方便后续工作
    course_mdic, course_mdicr = makeDic(course_info_list)

    # 建立字典,实现用户id和索引序号之间的映射,方便后续工作
    user_mdic, user_mdicr = makeDic(user_basic_info_list)

    ratings = list()
    for j in range(range_length):
        w = []
        w.append(user_mdic[k[j][0]])
        w.append(course_mdic[k[j][1]])
        w.append(5 * k[j][2])
        ratings.append(w)
    #print('ratings')
    #print(len(ratings))

    demo = CF(movies, ratings, course_length, user_length, k=10)
    demo.recommendByUser()
    recommend_result = demo.recommandList
    print("训练集的数据为%d条" % (len(demo.trans_data)))
    print("测试集的数据为%d条" % (len(demo.test_data)))
    end = time.clock()
    print("耗费时间: %f s" % (end - start))
Example #5
0
def loadData():
    if FSLflag == False:
        all_data = pd.read_csv(
            '../DGL/ml-100k/u.data',
            sep='\t',
            header=None,
            names=['user_id', 'item_id', 'rating', 'timestamp'])
        # test_data = pd.read_csv('../DGL/ml-100k/ua.test', sep='\t', header=None,
        #                         names=['user_id', 'item_id', 'rating', 'timestamp'])
        user_data = pd.read_csv('../DGL/ml-100k/u.user',
                                sep='|',
                                header=None,
                                encoding='latin1')
        item_data = pd.read_csv('../DGL/ml-100k/u.item',
                                sep='|',
                                header=None,
                                encoding='latin1')
        # test_data = test_data[test_data['user_id'].isin(train_data['user_id']) &
        #                       test_data['item_id'].isin(train_data['item_id'])]
        # u_data = user_data[[0,1,2,3,4]]
        # u_data.columns = ['user_id','age','gender','occupation','zip_code']
        # i_data = item_data
        # i_data.columns = ['item_id','title','release_date','video_release_date','IMDb_URL','unknown','Action','Adventure','Animation','Children',
        #                   'Comedy','Crime','Documentary','Drama','Fantasy','Film-Noir','Horror','Musical','Mystery','Romance','Sci-Fi','Thriller',
        #                   'War','Western']

        return all_data, user_data, item_data

    else:
        dbHandle = DatabaseIo()
        if not dbHandle:
            return None
        sql_dr = DataBaseQuery["course_dr"]
        sql_course = DataBaseQuery["course_info"]
        sql_user = DataBaseQuery["user_id"]
        sql_classify = DataBaseQuery["classify_info"]
        result_dr = dbHandle.doSql(execType=DataBaseOperateType.SearchMany,
                                   sql=sql_dr)
        result_course = dbHandle.doSql(execType=DataBaseOperateType.SearchMany,
                                       sql=sql_course)
        result_classify = dbHandle.doSql(
            execType=DataBaseOperateType.SearchMany, sql=sql_classify)
        dbHandle.changeCloseFlag()
        result_user = dbHandle.doSql(execType=DataBaseOperateType.SearchMany,
                                     sql=sql_user)
        drList = formatDataByType(SetType.SetType_List, result_dr)
        all_data = pd.DataFrame(list(drList))
        all_data.columns = ['user_id', 'item_id', 'rating']
        user_data = pd.DataFrame(list(result_user))
        item_data = pd.DataFrame(list(result_course))

        classify_data = pd.DataFrame(list(result_classify))
        classify_data.columns = [
            'id', 'course_name', 'classify_name', 'classify_id'
        ]

        return all_data, user_data, item_data, classify_data
def get_couse_info():
    # logging.warning(u"运行日志:获取课程信息")
    dbHandle = DatabaseIo()
    if not dbHandle:
        return None
    # sql_course = "select id, name from course5000"
    sql_course = DataBaseQuery["course_info"]
    result_course = dbHandle.doSql(execType=DataBaseOperateType.SearchMany,
                                   sql=sql_course)
    dbHandle.changeCloseFlag()
    courseList = formatDataByType(SetType.SetType_List, result_course)
    return courseList
def get_couse_info_with_video():
    # logging.warning(u"运行日志:获取课程信息")
    dbHandle = DatabaseIo()
    if not dbHandle:
        return None
    # 先用图片替代模拟视频
    # sql_course = "select id, name, image, description from course5000"
    sql_course = DataBaseQuery["interface_video"]
    result_course = dbHandle.doSql(execType=DataBaseOperateType.SearchMany,
                                   sql=sql_course)
    dbHandle.changeCloseFlag()
    courseList = formatDataByType(SetType.SetType_List, result_course)
    return courseList
Example #8
0
def getdata(FSLflag):
    print("get data")
    if FSLflag == False:
        train_data = pd.read_csv(
            '../DGL/ml-100k/ua.base',
            sep='\t',
            header=None,
            names=['user_id', 'item_id', 'rating', 'timestamp'])
        test_data = pd.read_csv(
            '../DGL/ml-100k/ua.test',
            sep='\t',
            header=None,
            names=['user_id', 'item_id', 'rating', 'timestamp'])
        user_data = pd.read_csv('../DGL/ml-100k/u.user',
                                sep='|',
                                header=None,
                                encoding='latin1')
        item_data = pd.read_csv('../DGL/ml-100k/u.item',
                                sep='|',
                                header=None,
                                encoding='latin1')
        test_data = test_data[
            test_data['user_id'].isin(train_data['user_id'])
            & test_data['item_id'].isin(train_data['item_id'])]

        train_data = train_data.values.tolist()
        test_data = test_data.values.tolist()
        user_data = user_data.values.tolist()
        item_data = item_data.values.tolist()
        # print item_data
        # print item_data
        return train_data, test_data, user_data, item_data
    else:
        dbHandle = DatabaseIo()
        if not dbHandle:
            return None
        sql_dr = DataBaseQuery["course_dr"]
        sql_course = DataBaseQuery["course_info"]
        sql_user = DataBaseQuery["user_id"]
        sql_classify = DataBaseQuery["classify_info"]
        result_dr = dbHandle.doSql(execType=DataBaseOperateType.SearchMany,
                                   sql=sql_dr)
        result_course = dbHandle.doSql(execType=DataBaseOperateType.SearchMany,
                                       sql=sql_course)
        result_classify = dbHandle.doSql(
            execType=DataBaseOperateType.SearchMany, sql=sql_classify)
        dbHandle.changeCloseFlag()
        result_user = dbHandle.doSql(execType=DataBaseOperateType.SearchMany,
                                     sql=sql_user)
        drList = formatDataByType(SetType.SetType_List, result_dr)
        user_data = formatDataByType(SetType.SetType_Set, result_user)
        item_data = formatDataByType(SetType.SetType_List, result_course)
        classify_data = formatDataByType(SetType.SetType_List, result_classify)
        dr_length = len(drList)
        testIDs = random.sample(range(1, dr_length), int(dr_length / 10))
        data = pd.DataFrame(drList)
        train_data = []
        test_data = []
        for index, row in data.iterrows():
            if (index + 1) in testIDs:
                test_data.append(row)
            else:
                train_data.append(row)
        return train_data, test_data, user_data, item_data, classify_data
Example #9
0
def transToMatrix(p):
    return formatDataByType(SetType.SetType_Set, p)
def dataPreprocessiong():
    result_dr, result_course, result_user = getDataFromDB()

    drList = formatDataByType(SetType.SetType_List, result_dr)
    userList = formatDataByType(SetType.SetType_Set, result_user)
    courseList = formatDataByType(SetType.SetType_List, result_course)

    dr_length = len(drList)
    course_length = len(courseList)
    user_length = len(userList)

    user_mdic, user_mdicr = makeDic(userList)
    course_mdic, course_mdicr = makeDic(courseList)

    result, learned = [], []
    for dr in drList:
        temp_result, temp_learned = [], []
        temp_result.append(user_mdic[dr[0]] + 1)
        temp_result.append(dr[1])
        temp_result.append(dr[2] * 5)
        temp_learned.append(dr[0])
        temp_learned.append(dr[1])
        temp_learned.append(get_keys(dr[1], courseList))
        result.append(temp_result)
        learned.append(temp_learned)

    data = pd.DataFrame(result)
    #用dcL删除某些结果里没有的课程
    dc = pd.read_csv("roc/toGcn2.csv")
    dc.drop_duplicates(subset=['cid'], keep='first', inplace=True)
    dcL = list(dc['cid'])

    #保存二部图输入
    myfile = codecs.open("gcn/bg_input.csv", mode="w", encoding='utf-8')
    data.drop_duplicates(subset=[0, 1], keep='first', inplace=True)
    data.reset_index(inplace=True)
    #删除结果里没有的课程
    for i in range(len(data)):
        if data[1][i] not in dcL:
            data.drop(i, axis=0, inplace=True)

    #打印去重的看有多少课程。0用户,1课程
    # aa=data.drop_duplicates(subset=[1],keep='first')
    # print(aa.reset_index())
    data.drop('index', axis=1, inplace=True)
    datalist = data.values.tolist()

    for row in datalist:
        row[0] = user_mdicr[int(row[0]) - 1]
        myfile.write(str(row[0]))
        myfile.write(",")
        myfile.write(str(row[1]))
        myfile.write(",")
        myfile.write(str(int((row[2]))))
        myfile.write("\n")
    myfile.close()

    #构造二部图的输入数据的真实值矩阵realGraph,以及长id到短id(0开始)的字典
    userListNew = list(data[0])
    for i in range(len(userListNew)):
        userListNew[i] = user_mdicr[userListNew[i] - 1]
    userListNewTemp = sorted(list(set(userListNew)))
    courseListNew = list(data[1])
    courseListNewTemp = sorted(list(set(courseListNew)))
    course_mdic_new = {}
    user_mdic_new = {}

    for i in range(len(userListNewTemp)):
        user_mdic_new[userListNewTemp[i]] = i
    for i in range(len(courseListNewTemp)):
        course_mdic_new[courseListNewTemp[i]] = i

    realGraph = nm.zeros((len(courseListNewTemp), len(userListNewTemp)))
    print(len(courseListNewTemp))
    for i in range(len(data)):
        realGraph[[course_mdic_new[courseListNew[i]]],
                  user_mdic_new[userListNew[i]]] = 1

    nm.savetxt('roc/realGraph.txt', realGraph)
    nm.save('user_mdic_new.npy', user_mdic_new)
    nm.save('course_mdic_new.npy', course_mdic_new)

    return data, learned, course_mdic, course_mdicr, user_mdic, user_mdicr,\
        dr_length, course_length, user_length, courseList