Exemple #1
0
def getDataFromDB():
    dbHandle = DatabaseIo()
    if not dbHandle:
        return None

    # sql_dr = """SELECT * FROM course_dr5000_changed"""
    sql_dr = DataBaseQuery["online_course_dr_changed"]
    # sql_course = "select id , system_course_id ,course_name from course_info"
    # sql_course = """select id, name from course5000"""
    sql_course = DataBaseQuery["course_info"]
    # sql_user = """select user_id from user_basic_info"""
    # sql_user = """select id from account5000"""
    sql_user = DataBaseQuery["user_id"]

    result_dr = dbHandle.doSql(execType=DataBaseOperateType.SearchMany,
                               sql=sql_dr)
    result_course = dbHandle.doSql(execType=DataBaseOperateType.SearchMany,
                                   sql=sql_course)
    dbHandle.changeCloseFlag()
    result_user = dbHandle.doSql(execType=DataBaseOperateType.SearchMany,
                                 sql=sql_user)

    print(
        "len(result_dr) = {}, len(result_user) = {},\
          len(result_course) = {}".format(len(result_dr), len(result_user),
                                          len(result_course)))

    return result_dr, result_course, result_user
Exemple #2
0
def loadData():
    if FSLflag == False:
        all_data = pd.read_csv(
            '../DGL/ml-100k/u.data',
            sep='\t',
            header=None,
            names=['user_id', 'item_id', 'rating', 'timestamp'])
        # test_data = pd.read_csv('../DGL/ml-100k/ua.test', sep='\t', header=None,
        #                         names=['user_id', 'item_id', 'rating', 'timestamp'])
        user_data = pd.read_csv('../DGL/ml-100k/u.user',
                                sep='|',
                                header=None,
                                encoding='latin1')
        item_data = pd.read_csv('../DGL/ml-100k/u.item',
                                sep='|',
                                header=None,
                                encoding='latin1')
        # test_data = test_data[test_data['user_id'].isin(train_data['user_id']) &
        #                       test_data['item_id'].isin(train_data['item_id'])]
        # u_data = user_data[[0,1,2,3,4]]
        # u_data.columns = ['user_id','age','gender','occupation','zip_code']
        # i_data = item_data
        # i_data.columns = ['item_id','title','release_date','video_release_date','IMDb_URL','unknown','Action','Adventure','Animation','Children',
        #                   'Comedy','Crime','Documentary','Drama','Fantasy','Film-Noir','Horror','Musical','Mystery','Romance','Sci-Fi','Thriller',
        #                   'War','Western']

        return all_data, user_data, item_data

    else:
        dbHandle = DatabaseIo()
        if not dbHandle:
            return None
        sql_dr = DataBaseQuery["course_dr"]
        sql_course = DataBaseQuery["course_info"]
        sql_user = DataBaseQuery["user_id"]
        sql_classify = DataBaseQuery["classify_info"]
        result_dr = dbHandle.doSql(execType=DataBaseOperateType.SearchMany,
                                   sql=sql_dr)
        result_course = dbHandle.doSql(execType=DataBaseOperateType.SearchMany,
                                       sql=sql_course)
        result_classify = dbHandle.doSql(
            execType=DataBaseOperateType.SearchMany, sql=sql_classify)
        dbHandle.changeCloseFlag()
        result_user = dbHandle.doSql(execType=DataBaseOperateType.SearchMany,
                                     sql=sql_user)
        drList = formatDataByType(SetType.SetType_List, result_dr)
        all_data = pd.DataFrame(list(drList))
        all_data.columns = ['user_id', 'item_id', 'rating']
        user_data = pd.DataFrame(list(result_user))
        item_data = pd.DataFrame(list(result_course))

        classify_data = pd.DataFrame(list(result_classify))
        classify_data.columns = [
            'id', 'course_name', 'classify_name', 'classify_id'
        ]

        return all_data, user_data, item_data, classify_data
def get_couse_info():
    # logging.warning(u"运行日志:获取课程信息")
    dbHandle = DatabaseIo()
    if not dbHandle:
        return None
    # sql_course = "select id, name from course5000"
    sql_course = DataBaseQuery["course_info"]
    result_course = dbHandle.doSql(execType=DataBaseOperateType.SearchMany,
                                   sql=sql_course)
    dbHandle.changeCloseFlag()
    courseList = formatDataByType(SetType.SetType_List, result_course)
    return courseList
def get_all_users():
    # logging.warning(u"运行日志:获取所有用户的id")
    dbHandle = DatabaseIo()
    if not dbHandle:
        return None

    # sql_user = "******"
    sql_user = DataBaseQuery["user_id"]
    result_user = dbHandle.doSql(execType=DataBaseOperateType.SearchMany,
                                 sql=sql_user)
    dbHandle.changeCloseFlag()
    return result_user
def get_couse_info_with_video():
    # logging.warning(u"运行日志:获取课程信息")
    dbHandle = DatabaseIo()
    if not dbHandle:
        return None
    # 先用图片替代模拟视频
    # sql_course = "select id, name, image, description from course5000"
    sql_course = DataBaseQuery["interface_video"]
    result_course = dbHandle.doSql(execType=DataBaseOperateType.SearchMany,
                                   sql=sql_course)
    dbHandle.changeCloseFlag()
    courseList = formatDataByType(SetType.SetType_List, result_course)
    return courseList
def get_course_info_with_image():
    dbHandle = DatabaseIo()
    if not dbHandle:
        return None
    # sql_course = "select id, name, image, description from course5000"
    sql_course = DataBaseQuery["interface_image"]
    # sql_classify = "select id, course_name, classify_name from course_classify5000"
    sql_classify = DataBaseQuery["classify_info"]
    result_course = dbHandle.doSql(execType=DataBaseOperateType.SearchMany,
                                   sql=sql_course)
    result_course_classify = dbHandle.doSql(
        execType=DataBaseOperateType.SearchMany, sql=sql_classify)
    dbHandle.changeCloseFlag()
    courseList = formatDataByType(SetType.SetType_List, result_course)
    classifyList = formatDataByType(SetType.SetType_List,
                                    result_course_classify)
    return courseList, classifyList
def high_score_courses():
    logging.warning(u"运行日志:获取高评分课程列表")

    dbHandle = DatabaseIo()
    if not dbHandle:
        return None

    sql_clean_highscorecourse = DataBaseQuery["highscore_clean_course"]
    #sql_clean_highscorecourse = 'truncate table  high_score_course;'

    sql_insert_highscorecourse = DataBaseQuery["highscore_insert_course"]
    #sql_insert_highscorecourse = "insert into high_score_course(course_id, score) values (%s, %s)"

    courses = get_user_course()
    course_score_sum = {}
    highscoreCourseList = list()
    for row in courses:
        if int(row[1]) not in course_score_sum:
            # course_score_sum[int(row[1])] = [int(row[3]), 1]
            course_score_sum[row[1]] = [int(row[3]), 1]
        else:
            # course_score_sum[int(row[1])] = [(course_score_sum[int(row[1])][0] + int(row[3])), (course_score_sum[int(row[1])][1] + 1)]
            course_score_sum[row[1]] = [
                (course_score_sum[row[1]][0] + int(row[3])),
                (course_score_sum[row[1]][1] + 1)
            ]
    course_score = {}
    for key in course_score_sum:
        course_score[key] = course_score_sum[key][0] / course_score_sum[key][1]
    result = sort_by_value(course_score)
    # print(result)
    for row in result:
        course_id, score = row
        highscoreCourseList.append(tuple([course_id, score]))

    insertTuple = tuple(highscoreCourseList)
    dbHandle.doSql(DataBaseOperateType.InsertOne, sql_clean_highscorecourse)
    dbHandle.changeCloseFlag()
    dbHandle.doSql(DataBaseOperateType.InsertMany, sql_insert_highscorecourse,
                   insertTuple)

    print("highscorecourse success")
    # result_dataframe = DataFrame(result)
    # # result_dataframe.to_csv('highScore.csv', index=None, header=None)
    # result_dataframe.to_csv('file_saved/highScore.csv', index=None, header=None)
    return result
def popular_courses():
    logging.warning(u"运行日志:获取热门课程列表")

    dbHandle = DatabaseIo()
    if not dbHandle:
        return None

    sql_clean_popularcourse = DataBaseQuery["popular_clean_course"]
    #sql_clean_popularcourse = 'truncate table popular_course;'

    sql_insert_popularcourse = DataBaseQuery["popular_insert_course"]
    #sql_insert_popularcourse = "insert into popular_course(course_id, course_click_times) values (%s, %s)"

    courses = get_user_course()
    course_click_times = {}
    popularCourseList = list()
    for row in courses:
        # if int(row[1]) not in course_click_times:
        #     course_click_times[int(row[1])] = int(row[2])
        if row[1] not in course_click_times:
            course_click_times[row[1]] = int(row[2])
        else:
            # course_click_times[int(row[1])] = course_click_times[int(row[1])] + int(row[2])
            course_click_times[row[1]] = course_click_times[row[1]] + int(
                row[2])
    result = sort_by_value(course_click_times)
    # result_dataframe = DataFrame(result)
    # print(result_dataframe)
    for row in result:
        course_id, click_times = row
        popularCourseList.append(tuple([course_id, click_times]))

    insertTuple = tuple(popularCourseList)
    # print(insertTuple)
    dbHandle.doSql(DataBaseOperateType.InsertOne, sql_clean_popularcourse)
    dbHandle.changeCloseFlag()
    dbHandle.doSql(DataBaseOperateType.InsertMany, sql_insert_popularcourse,
                   insertTuple)
    print("popularcourse success")
    # result_dataframe = DataFrame(result)
    # # result_dataframe.to_csv('popular.csv', index=None, header=None)
    # result_dataframe.to_csv('file_saved/popular.csv', index=None, header=None)
    # print result_dataframe
    return result
def updateCourseDr():
    logging.warning(u"运行日志:开始数据预处理")
    print("run coursePredeal...")

    # sql_select_course = '''select account_id,course_id,
    #         (CASE
    #         WHEN duration>10 THEN 1
    #         ELSE 0 END)as time,
    #         CASE collect_status
    #         WHEN 'YES' THEN 1 ELSE 0 END AS collect,
    #         CASE commit_status
    #         WHEN 'YES' THEN 1.5 ELSE 0 END AS commit_status,
    #         CASE
    #         WHEN score>50 THEN 1 ELSE 0 END AS score
    #         FROM account_course5000'''
    sql_select_course = DataBaseQuery["predeal_user_course"]
    # sql_insert_course_dr = '''INSERT INTO course_dr(user_id, course_index, recommend_value)
    #                     VALUES (%s, %s, %s)'''
    sql_insert_course_dr = DataBaseQuery["predeal_insert_course_dr"]
    # sql_clean_course_dr = 'truncate table course_dr;'
    sql_clean_course_dr = DataBaseQuery["predeal_clean_course_dr"]

    dbHandle = DatabaseIo()
    if not dbHandle:
        return None

    result = dbHandle.doSql(execType=DataBaseOperateType.SearchMany,
                            sql=sql_select_course)

    userCourseList = list()
    for row in result:
        user_id, course_id, time, collect, commit, score = row
        count = (3 * time + 2 * commit + collect + score) / 8
        userCourseList.append(tuple([user_id, course_id, count]))

    insertTuple = tuple(userCourseList)

    dbHandle.doSql(DataBaseOperateType.InsertOne, sql_clean_course_dr)
    dbHandle.changeCloseFlag()
    dbHandle.doSql(DataBaseOperateType.InsertMany, sql_insert_course_dr,
                   insertTuple)
    print("coursePredeal success")
def get_user_course():
    # logging.warning(u"运行日志:获取用户与课程的交互数据")
    dbHandle = DatabaseIo()
    if not dbHandle:
        return None

    # sql_user_course = "select account_id, course_id, click_times, score from account_course5000"
    sql_user_course = DataBaseQuery["user_course"]
    result_user_course = dbHandle.doSql(
        execType=DataBaseOperateType.SearchMany, sql=sql_user_course)

    # sql_user_course_changed = "select account_id, course_id, click_times, score from account_course5000"
    sql_user_course_changed = DataBaseQuery["user_course_changed"]
    result_user_course_changed = dbHandle.doSql(
        execType=DataBaseOperateType.SearchMany, sql=sql_user_course_changed)
    dbHandle.changeCloseFlag()

    result_user_course = list(result_user_course) + list(
        result_user_course_changed)

    return result_user_course
def getDataFromDB():
    dbHandle = DatabaseIo()
    if not dbHandle:
        return None

    sql_dr = """select *          from course_dr"""
    sql_course = "select id , system_course_id ,course_name from course_info"
    sql_user = """select user_id from user_basic_info"""

    result_dr = dbHandle.doSql(execType=DataBaseOperateType.SearchMany,
                               sql=sql_dr)
    result_course = dbHandle.doSql(execType=DataBaseOperateType.SearchMany,
                                   sql=sql_course)
    dbHandle.changeCloseFlag()
    result_user = dbHandle.doSql(execType=DataBaseOperateType.SearchMany,
                                 sql=sql_user)

    print("len(result_dr) = {}, len(result_user) = {},\
          len(result_course) = {}".format(len(result_dr), len(result_user),
                                          len(result_course)))

    return result_dr, result_course, result_user
def updateCourseDrChanged(d):
    logging.warning(u"运行日志:在线模块数据处理")

    # sql_select_course = '''select user_id,course_id,
    #     (CASE
    #     WHEN learning_time>10 THEN 1
    #     ELSE 0 END)as time,
    #     CASE collect_status
    #     WHEN 'YES' THEN 1 ELSE 0 END AS collect,
    #     CASE commit_status
    #     WHEN 'YES' THEN 1.5 ELSE 0 END AS commit_status,
    #     CASE
    #     WHEN score>50 THEN 1 ELSE 0 END AS score
    #     FROM user_course_changed
    #     WHERE learning_time > 10
    #     AND UNIX_TIMESTAMP(update_time) > UNIX_TIMESTAMP('{0}')'''.format(d)
    #
    # sql_insert_course_dr_changed = '''INSERT INTO course_dr_changed(user_id, course_id, recommend_value)
    #                     VALUES (%s, %s, %s)'''
    # sql_clean_course_dr_changed = 'truncate table course_dr_changed;'

    # sql_select_course = '''select account_id,course_id,
    #         (CASE
    #         WHEN duration>10 THEN 1
    #         ELSE 0 END)as time,
    #         CASE collect_status
    #         WHEN 'YES' THEN 1 ELSE 0 END AS collect,
    #         CASE commit_status
    #         WHEN 'YES' THEN 1.5 ELSE 0 END AS commit_status,
    #         CASE
    #         WHEN score>50 THEN 1 ELSE 0 END AS score
    #         FROM account_course5000
    #         WHERE duration > 10
    #         AND UNIX_TIMESTAMP(update_time) > UNIX_TIMESTAMP('{0}')'''.format(d)
    sql_select_course = DataBaseQuery["online_predeal_select_course"].format(d)

    sql_insert_course_dr = '''INSERT INTO course_dr5000_changed(user_id, course_index, recommend_value)
                        VALUES (%s, %s, %s)'''
    # sql_insert_course_dr = DataBaseQuery["online_predeal_insert_course_dr"]

    sql_clean_course_dr = 'truncate table course_dr5000_changed;'
    # sql_clean_course_dr = DataBaseQuery["online_clean_course_dr"]

    dbHandle = DatabaseIo()
    if not dbHandle:
        return None

    result = dbHandle.doSql(execType=DataBaseOperateType.SearchMany,
                            sql=sql_select_course)

    userCourseList = list()
    for row in result:
        user_id, course_id, time, collect, commit, score = row
        count = (3 * time + 2 * commit + collect + score) / 8
        userCourseList.append(tuple([user_id, course_id, count]))

    insertTuple = tuple(userCourseList)

    dbHandle.doSql(DataBaseOperateType.InsertOne, sql_clean_course_dr)
    dbHandle.changeCloseFlag()
    dbHandle.doSql(DataBaseOperateType.InsertMany, sql_insert_course_dr,
                   insertTuple)
Exemple #13
0
def getdata(FSLflag):
    print("get data")
    if FSLflag == False:
        train_data = pd.read_csv(
            '../DGL/ml-100k/ua.base',
            sep='\t',
            header=None,
            names=['user_id', 'item_id', 'rating', 'timestamp'])
        test_data = pd.read_csv(
            '../DGL/ml-100k/ua.test',
            sep='\t',
            header=None,
            names=['user_id', 'item_id', 'rating', 'timestamp'])
        user_data = pd.read_csv('../DGL/ml-100k/u.user',
                                sep='|',
                                header=None,
                                encoding='latin1')
        item_data = pd.read_csv('../DGL/ml-100k/u.item',
                                sep='|',
                                header=None,
                                encoding='latin1')
        test_data = test_data[
            test_data['user_id'].isin(train_data['user_id'])
            & test_data['item_id'].isin(train_data['item_id'])]

        train_data = train_data.values.tolist()
        test_data = test_data.values.tolist()
        user_data = user_data.values.tolist()
        item_data = item_data.values.tolist()
        # print item_data
        # print item_data
        return train_data, test_data, user_data, item_data
    else:
        dbHandle = DatabaseIo()
        if not dbHandle:
            return None
        sql_dr = DataBaseQuery["course_dr"]
        sql_course = DataBaseQuery["course_info"]
        sql_user = DataBaseQuery["user_id"]
        sql_classify = DataBaseQuery["classify_info"]
        result_dr = dbHandle.doSql(execType=DataBaseOperateType.SearchMany,
                                   sql=sql_dr)
        result_course = dbHandle.doSql(execType=DataBaseOperateType.SearchMany,
                                       sql=sql_course)
        result_classify = dbHandle.doSql(
            execType=DataBaseOperateType.SearchMany, sql=sql_classify)
        dbHandle.changeCloseFlag()
        result_user = dbHandle.doSql(execType=DataBaseOperateType.SearchMany,
                                     sql=sql_user)
        drList = formatDataByType(SetType.SetType_List, result_dr)
        user_data = formatDataByType(SetType.SetType_Set, result_user)
        item_data = formatDataByType(SetType.SetType_List, result_course)
        classify_data = formatDataByType(SetType.SetType_List, result_classify)
        dr_length = len(drList)
        testIDs = random.sample(range(1, dr_length), int(dr_length / 10))
        data = pd.DataFrame(drList)
        train_data = []
        test_data = []
        for index, row in data.iterrows():
            if (index + 1) in testIDs:
                test_data.append(row)
            else:
                train_data.append(row)
        return train_data, test_data, user_data, item_data, classify_data