Exemple #1
0
def quiz_sessions(metadata_path, log_path, cursor):
    
    # Collect course information
    course_metadata_map = ExtractCourseInformation(metadata_path)       
                            
    # Processing events data
    submission_event_collection = []

    # Problem check
    submission_event_collection.append("problem_check")     # Server
    submission_event_collection.append("save_problem_check")
    submission_event_collection.append("problem_check_fail")
    submission_event_collection.append("save_problem_check_fail")
    
    # The server emits a problem_graded event each time a user selects Check for a problem and it is graded success- fully.
    submission_event_collection.append("problem_graded")
    
    # The server emits problem_rescore events when a problem is successfully rescored.
    submission_event_collection.append("problem_rescore")
    submission_event_collection.append("problem_rescore_fail")
    
    submission_event_collection.append("problem_reset") # event_source: serve
    submission_event_collection.append("reset_problem")
    submission_event_collection.append("reset_problem_fail")
    
    # The server emits problem_save events after a user saves a problem.
    submission_event_collection.append("problem_save") # event_source: server
    submission_event_collection.append("save_problem_fail")
    submission_event_collection.append("save_problem_success")
    
    # Show answer
    submission_event_collection.append("problem_show")
    submission_event_collection.append("showanswer")
    
    current_date = course_metadata_map["start_date"]   
    end_next_date = getNextDay(course_metadata_map["end_date"])

    log_files = os.listdir(log_path)
    
    child_parent_map = course_metadata_map["child_parent_map"]
    
    learner_all_event_logs = {}
    updated_learner_all_event_logs = {}
    quiz_sessions = {}
    
    while True:
        
        if current_date == end_next_date:
            break;
        
        for file in log_files:           
            if current_date in file:
                
                print file
                
                learner_all_event_logs.clear()
                learner_all_event_logs = updated_learner_all_event_logs.copy()
                updated_learner_all_event_logs.clear()
                
                # Course_learner_id set
                course_learner_id_set = set()
                for course_learner_id in learner_all_event_logs.keys():
                    course_learner_id_set.add(course_learner_id)
                
                input_file = open(log_path + file,"r")                
                lines = input_file.readlines()
                        
                for line in lines:                              
                    
                    jsonObject = json.loads(line)
                    
                    global_learner_id = jsonObject["context"]["user_id"]
                    event_type = str(jsonObject["event_type"])
                    
                    if global_learner_id != "":
                        
                        course_id = jsonObject["context"]["course_id"]
                        course_learner_id = course_id + "_" + str(global_learner_id)
                        
                        event_time = jsonObject["time"]
                        event_time = event_time[0:19]
                        event_time = event_time.replace("T", " ")
                        event_time = datetime.datetime.strptime(event_time,"%Y-%m-%d %H:%M:%S")
                                               
                        if learner_all_event_logs.has_key(course_learner_id):
                            learner_all_event_logs[course_learner_id].append({"event_time":event_time, "event_type":event_type})
                        else:
                            learner_all_event_logs[course_learner_id] = [{"event_time":event_time, "event_type":event_type}]
                            
                # For quiz session separation
                for course_learner_id in learner_all_event_logs.keys():
                             
                    event_logs = learner_all_event_logs[course_learner_id]
                    
                    # Sorting
                    event_logs.sort(cmp=cmp_datetime, key=operator.itemgetter('event_time'))
                      
                    session_id = ""
                    start_time = ""
                    end_time = ""
                    
                    final_time = ""                  
                    
                    for i in range(len(event_logs)):
                        
                        if session_id == "":
                            
                            if "problem+block" in event_logs[i]["event_type"] or "_problem;_" in event_logs[i]["event_type"]:
                                
                                event_type_array = event_logs[i]["event_type"].split("/")
                                
                                if "problem+block" in event_logs[i]["event_type"]:
                                    question_id = event_type_array[4]
                                    
                                if "_problem;_" in event_logs[i]["event_type"]:
                                    question_id = event_type_array[6].replace(";_", "/")
                                
                                if question_id in child_parent_map.keys():
                                    
                                    parent_block_id = child_parent_map[question_id]
                                
                                    session_id = "quiz_session_" + parent_block_id + "_" + course_learner_id
                                    start_time = event_logs[i]["event_time"]
                                    end_time = event_logs[i]["event_time"]                                
                                                                                        
                        else:
                            
                            if "problem+block" in event_logs[i]["event_type"] or "_problem;_" in event_logs[i]["event_type"] or event_logs[i]["event_type"] in submission_event_collection:

                                if event_logs[i]["event_time"] > end_time + datetime.timedelta(hours=0.5):
                                    
                                    if quiz_sessions.has_key(session_id):
                                        quiz_sessions[session_id]["time_array"].append({"start_time":start_time, "end_time":end_time})
                                    else:
                                        quiz_sessions[session_id] = {"course_learner_id":course_learner_id, "time_array":[{"start_time":start_time, "end_time":end_time}]}
                                    
                                    final_time = event_logs[i]["event_time"]
                                    
                                    if "problem+block" in event_logs[i]["event_type"] or "_problem;_" in event_logs[i]["event_type"]:
                                        event_type_array = event_logs[i]["event_type"].split("/")
                                        question_id = event_type_array[4]
                                        
                                        if question_id in child_parent_map.keys():
                                            parent_block_id = child_parent_map[question_id]
                                            session_id = "quiz_session_" + parent_block_id + "_" +course_learner_id
                                            start_time = event_logs[i]["event_time"]
                                            end_time = event_logs[i]["event_time"]
                                        else:
                                            session_id = ""
                                            start_time = ""
                                            end_time = ""     
                                else:                                    
                                    end_time = event_logs[i]["event_time"]
                                                                
                            else:
                                
                                end_time = event_logs[i]["event_time"]
                                
                                if quiz_sessions.has_key(session_id):
                                    quiz_sessions[session_id]["time_array"].append({"start_time":start_time, "end_time":end_time})
                                else:
                                    quiz_sessions[session_id] = {"course_learner_id":course_learner_id, "time_array":[{"start_time":start_time, "end_time":end_time}]}
                                
                                final_time = event_logs[i]["event_time"]
                                
                                session_id = ""
                                start_time = ""
                                end_time = ""
                                
                    if final_time != "":
                        new_logs = []                
                        for log in event_logs:                 
                            if log["event_time"] >= final_time:
                                new_logs.append(log)
                                
                        updated_learner_all_event_logs[course_learner_id] = new_logs
                    
        current_date = getNextDay(current_date)
    
    # To compress the session event_logs
    for session_id in quiz_sessions.keys():
        if len(quiz_sessions[session_id]["time_array"]) > 1:            
            
            start_time = ""
            end_time = ""
            updated_time_array = []
            
            for i in range(len(quiz_sessions[session_id]["time_array"])):                
                if i == 0:
                    start_time = quiz_sessions[session_id]["time_array"][i]["start_time"]
                    end_time = quiz_sessions[session_id]["time_array"][i]["end_time"]
                else:
                    if quiz_sessions[session_id]["time_array"][i]["start_time"] > end_time + datetime.timedelta(hours=0.5):
                        updated_time_array.append({"start_time":start_time, "end_time":end_time})                        
                        start_time = quiz_sessions[session_id]["time_array"][i]["start_time"]
                        end_time = quiz_sessions[session_id]["time_array"][i]["end_time"]
                    else:
                        end_time = quiz_sessions[session_id]["time_array"][i]["end_time"]
                        
                        if i == len(quiz_sessions[session_id]["time_array"]) - 1:
                            updated_time_array.append({"start_time":start_time, "end_time":end_time})
            
            quiz_sessions[session_id]["time_array"] = updated_time_array
            
    
    quiz_session_record = []
    
    for session_id in quiz_sessions.keys():
        course_learner_id = quiz_sessions[session_id]["course_learner_id"]
        for i in range(len(quiz_sessions[session_id]["time_array"])):
    
            start_time = quiz_sessions[session_id]["time_array"][i]["start_time"]
            end_time = quiz_sessions[session_id]["time_array"][i]["end_time"]
            if start_time < end_time:
                duration = (end_time - start_time).days * 24 * 60 * 60 + (end_time - start_time).seconds
                final_session_id = session_id + "_" + str(start_time) + "_" + str(end_time)
                
                if duration > 5:
                    array = [final_session_id, course_learner_id, start_time, end_time, duration]
                    quiz_session_record.append(array)
                    
    # Database version
    for array in quiz_session_record:
        session_id = array[0]
        course_learner_id = array[1]
        start_time = array[2]
        end_time = array[3]
        duration = array[4]
        sql = "insert into quiz_sessions (session_id, course_learner_id, start_time, end_time, duration) values"
        sql += "('%s','%s','%s','%s','%s');" % (session_id, course_learner_id, start_time, end_time, duration)
        cursor.execute(sql)
    
    ''' 
def sessions(metadata_path, log_path, cursor):
    
    # Collect course information
    course_metadata_map = ExtractCourseInformation(metadata_path)
    
    current_date = course_metadata_map["start_date"]   
    end_next_date = getNextDay(course_metadata_map["end_date"])
    
    learner_all_event_logs = {}
    updated_learner_all_event_logs = {}
    session_record = []
    
    log_files = os.listdir(log_path)
    
    while True:
        
        if current_date == end_next_date:
            break;
        
        for file in log_files:           
            
            if current_date in file:
                
                print file

                learner_all_event_logs.clear()
                learner_all_event_logs = updated_learner_all_event_logs.copy()
                updated_learner_all_event_logs.clear()
                
                # Course_learner_id set
                course_learner_id_set = set()
                for course_learner_id in learner_all_event_logs.keys():
                    course_learner_id_set.add(course_learner_id)
                
                input_file = open(log_path + file,"r")
                lines = input_file.readlines()
                        
                for line in lines:
                    
                    jsonObject = json.loads(line)
                    
                    global_learner_id = jsonObject["context"]["user_id"]
                    event_type = str(jsonObject["event_type"])
                    
                    if global_learner_id != "":
                        course_id = jsonObject["context"]["course_id"]
                        course_learner_id = course_id + "_" + str(global_learner_id)
                        
                        event_time = jsonObject["time"]
                        event_time = event_time[0:19]
                        event_time = event_time.replace("T", " ")
                        event_time = datetime.datetime.strptime(event_time,"%Y-%m-%d %H:%M:%S")
                        
                        if course_learner_id in course_learner_id_set:
                            learner_all_event_logs[course_learner_id].append({"event_time":event_time, "event_type":event_type})
                        else:
                            learner_all_event_logs[course_learner_id] = [{"event_time":event_time, "event_type":event_type}]
                            course_learner_id_set.add(course_learner_id)
                     
                for course_learner_id in learner_all_event_logs.keys():
                                 
                    event_logs = learner_all_event_logs[course_learner_id]
                    
                    # Sorting
                    event_logs.sort(cmp=cmp_datetime, key=operator.itemgetter('event_time'))
                      
                    session_id = ""
                    start_time = ""
                    end_time = ""
                    
                    final_time = ""
                    
                    for i in range(len(event_logs)):
                        
                        if start_time == "":
                            
                            # Initialization
                            start_time = event_logs[i]["event_time"]
                            end_time = event_logs[i]["event_time"]
                            
                        else:
                            
                            if event_logs[i]["event_time"] > end_time + datetime.timedelta(hours=0.5):
                                
                                session_id = course_learner_id + "_" + str(start_time) + "_" + str(end_time)
                                duration = (end_time - start_time).days * 24 * 60 * 60 + (end_time - start_time).seconds
                                
                                if duration > 5:
                                    array = [session_id, course_learner_id, start_time, end_time, duration]
                                    session_record.append(array)
                                    
                                final_time = event_logs[i]["event_time"]
                                    
                                # Re-initialization
                                session_id = ""
                                start_time = event_logs[i]["event_time"]
                                end_time = event_logs[i]["event_time"]
                            
                            else:
                                
                                if event_logs[i]["event_type"] == "page_close":
                                    
                                    end_time = event_logs[i]["event_time"]
                                    
                                    session_id = course_learner_id + "_" + str(start_time) + "_" + str(end_time)
                                    duration = (end_time - start_time).days * 24 * 60 * 60 + (end_time - start_time).seconds
                                
                                    if duration > 5:
                                        array = [session_id, course_learner_id, start_time, end_time, duration]
                                        session_record.append(array)
                                        
                                    # Re-initialization
                                    session_id = ""
                                    start_time = ""
                                    end_time = ""
                                    
                                    final_time = event_logs[i]["event_time"]
                                    
                                else:
                                    
                                    end_time = event_logs[i]["event_time"]
                        
                    if final_time != "":
                        new_logs = []                
                        for log in event_logs:                 
                            if log["event_time"] >= final_time:
                                new_logs.append(log)
                                
                        updated_learner_all_event_logs[course_learner_id] = new_logs
                        
        current_date = getNextDay(current_date)
    
    # Filter duplicated records
    updated_session_record = []
    session_id_set = set()
    for array in session_record:
        session_id = array[0]
        if session_id not in session_id_set:
            session_id_set.add(session_id)
            updated_session_record.append(array)
            
    session_record = updated_session_record
    
    # Database version
    for array in session_record:
        session_id = array[0]
        course_learner_id = array[1]
        start_time = array[2]
        end_time = array[3]
        duration = array[4]
        sql = "insert into sessions(session_id, course_learner_id, start_time, end_time, duration) values"
        sql += "('%s','%s','%s','%s','%s');" % (session_id, course_learner_id, start_time, end_time, duration)
        cursor.execute(sql)
        
            
    # File version
    '''
Exemple #3
0
def video_interaction(metadata_path, log_path, cursor):
    
    # Collect course information
    course_metadata_map = ExtractCourseInformation(metadata_path)
    
    current_date = course_metadata_map["start_date"]   
    end_next_date = getNextDay(course_metadata_map["end_date"])
    
    video_interaction_map = {}
    
    # Video-related event types
    video_event_types = []
    video_event_types.append("play_video")
    video_event_types.append("edx.video.played")
    video_event_types.append("stop_video")
    video_event_types.append("edx.video.stopped")
    video_event_types.append("pause_video")
    video_event_types.append("edx.video.paused")
    video_event_types.append("seek_video")
    video_event_types.append("edx.video.position.changed")
    video_event_types.append("speed_change_video")
    
    # Navigation-related event types
    navigation_event_types = []
    navigation_event_types.append("page_close")
    navigation_event_types.append("seq_goto")
    navigation_event_types.append("seq_next")
    navigation_event_types.append("seq_prev")
    
    learner_video_event_logs = {}
    updated_learner_video_event_logs = {}
    
    log_files = os.listdir(log_path)
    
    while True:
        
        if current_date == end_next_date:
            break;
        
        for file in log_files:           
            if current_date in file:
                
                print file

                learner_video_event_logs.clear()
                learner_video_event_logs = updated_learner_video_event_logs.copy()
                updated_learner_video_event_logs.clear()
                
                # Course_learner_id set
                course_learner_id_set = set()
                for course_learner_id in learner_video_event_logs.keys():
                    course_learner_id_set.add(course_learner_id)
                
                input_file = open(log_path + file,"r")
                lines = input_file.readlines()
                        
                for line in lines:
                    
                    jsonObject = json.loads(line)
                    
                    if jsonObject["event_type"] in video_event_types:
                        
                        global_learner_id = jsonObject["context"]["user_id"]
                        
                        if global_learner_id != "":
                            
                            course_id = jsonObject["context"]["course_id"]
                            course_learner_id = course_id + "_" + str(global_learner_id)
                            
                            video_id = ""
                        
                            event_time = jsonObject["time"]
                            event_time = event_time[0:19]
                            event_time = event_time.replace("T", " ")
                            event_time = datetime.datetime.strptime(event_time,"%Y-%m-%d %H:%M:%S")
                        
                            event_type = jsonObject["event_type"]
                        
                            # For seek event
                            new_time = 0
                            old_time = 0
                        
                            # For speed change event
                            new_speed = 0
                            old_speed = 0
                        
                            # This sub-condition does not exist in log data
                            # if isinstance(jsonObject["event"], dict):
                            #     video_id = jsonObject["event"]["id"]
                        
                            if isinstance(jsonObject["event"], unicode):
                                event_jsonObject = json.loads(jsonObject["event"])
                                video_id = event_jsonObject["id"]
                                
                                video_id = video_id.replace("-", "://", 1)
                                video_id = video_id.replace("-", "/")
                            
                                # For video seek event
                                if "new_time" in event_jsonObject and "old_time" in event_jsonObject:
                                    new_time = event_jsonObject["new_time"]
                                    old_time = event_jsonObject["old_time"]                                                                      
                                                                                
                                # For video speed change event           
                                if "new_speed" in event_jsonObject and "old_speed" in event_jsonObject:
                                    new_speed = event_jsonObject["new_speed"]
                                    old_speed = event_jsonObject["old_speed"]
                        
                            # To record video seek event                
                            if event_type in ["seek_video","edx.video.position.changed"]:
                                if new_time is not None and old_time is not None:
                                    if course_learner_id in course_learner_id_set:
                                        learner_video_event_logs[course_learner_id].append({"event_time":event_time, "event_type":event_type, "video_id":video_id, "new_time":new_time, "old_time":old_time})
                                    else:
                                        learner_video_event_logs[course_learner_id] = [{"event_time":event_time, "event_type":event_type, "video_id":video_id, "new_time":new_time, "old_time":old_time}]
                                        course_learner_id_set.add(course_learner_id)
                                continue
                        
                            # To record video speed change event                
                            if event_type in ["speed_change_video"]:
                                if course_learner_id in course_learner_id_set:
                                    learner_video_event_logs[course_learner_id].append({"event_time":event_time, "event_type":event_type, "video_id":video_id, "new_speed":new_speed, "old_speed":old_speed})
                                else:
                                    learner_video_event_logs[course_learner_id] = [{"event_time":event_time, "event_type":event_type, "video_id":video_id, "new_speed":new_speed, "old_speed":old_speed}]
                                    course_learner_id_set.add(course_learner_id)
                                continue                                                                      
                         
                            if course_learner_id in course_learner_id_set:
                                learner_video_event_logs[course_learner_id].append({"event_time":event_time, "event_type":event_type, "video_id":video_id})
                            else:
                                learner_video_event_logs[course_learner_id] = [{"event_time":event_time, "event_type":event_type, "video_id":video_id}]
                                course_learner_id_set.add(course_learner_id)
                    
                    # For navigation events                                    
                    if jsonObject["event_type"] in navigation_event_types:
                        
                        global_learner_id = jsonObject["context"]["user_id"]
                        
                        if global_learner_id != "":
                            course_id = jsonObject["context"]["course_id"]
                            course_learner_id = course_id + "_" + str(global_learner_id)                                  
                        
                            event_time = jsonObject["time"]
                            event_time = event_time[0:19]
                            event_time = event_time.replace("T", " ")
                            event_time = datetime.datetime.strptime(event_time,"%Y-%m-%d %H:%M:%S")
                        
                            event_type = jsonObject["event_type"]                  
                                                      
                            if course_learner_id in course_learner_id_set:
                                learner_video_event_logs[course_learner_id].append({"event_time":event_time, "event_type":event_type})
                            else:
                                learner_video_event_logs[course_learner_id] = [{"event_time":event_time, "event_type":event_type}]
                                course_learner_id_set.add(course_learner_id)
                                  
                for course_learner_id in learner_video_event_logs.keys():
                    
                    video_id = ""
                    
                    event_logs = learner_video_event_logs[course_learner_id]
                    
                    # Sorting
                    event_logs.sort(cmp=cmp_datetime, key=operator.itemgetter('event_time'))
                    
                    video_start_time = ""
                    final_time = ""
                    
                    # For video seek event
                    times_forward_seek = 0
                    duration_forward_seek = 0
                    times_backward_seek = 0
                    duration_backward_seek = 0
                    
                    # For video speed change event
                    speed_change_last_time = ""
                    times_speed_up = 0
                    times_speed_down = 0               
                    
                    # For video pause event                   
                    pause_check = False
                    pause_start_time = ""
                    duration_pause = 0                    
                                      
                    for log in event_logs:
                        
                        if log["event_type"] in ["play_video", "edx.video.played"]:
                            
                            video_start_time = log["event_time"]
                            video_id = log["video_id"]

                            if pause_check:
                                
                                duration_pause = (log["event_time"] - pause_start_time).seconds
                                video_interaction_id = course_learner_id + "_" + video_id + "_" + str(pause_start_time)
                                
                                if duration_pause > 2 and duration_pause < 600:
                                    if video_interaction_id in video_interaction_map.keys():
                                        video_interaction_map[video_interaction_id]["times_pause"] = 1                                        
                                        video_interaction_map[video_interaction_id]["duration_pause"] = duration_pause
                                
                                pause_check = False
                                                        
                            continue 
                        
                        if video_start_time != "":                                                    
                           
                            if log["event_time"] > video_start_time + datetime.timedelta(hours=0.5):
                                
                                video_start_time = ""
                                video_id = ""
                                final_time = log["event_time"]
                                
                            else:                               
                                
                                # 0. Seek
                                if log["event_type"] in ["seek_video", "edx.video.position.changed"] and video_id == log["video_id"]:                                                                       
                                    # Forward seek event
                                    if log["new_time"] > log["old_time"]:
                                        times_forward_seek += 1
                                        duration_forward_seek += log["new_time"] - log["old_time"]
                                    # Backward seek event                                    
                                    if log["new_time"] < log["old_time"]:
                                        times_backward_seek += 1
                                        duration_backward_seek += log["old_time"] - log["new_time"]
                                    continue
                                
                                # 1. Speed change
                                if log["event_type"] == "speed_change_video" and video_id == log["video_id"]:
                                    if speed_change_last_time == "":
                                        speed_change_last_time = log["event_time"]
                                        old_speed = log["old_speed"]
                                        new_speed = log["new_speed"]                                        
                                        if old_speed < new_speed:
                                            times_speed_up += 1
                                        if old_speed > new_speed:
                                            times_speed_down += 1
                                    else:
                                        if (log["event_time"] - speed_change_last_time).seconds > 10:
                                            old_speed = log["old_speed"]
                                            new_speed = log["new_speed"]                                        
                                            if old_speed < new_speed:
                                                times_speed_up += 1
                                            if old_speed > new_speed:
                                                times_speed_down += 1
                                        speed_change_last_time = log["event_time"]
                                    continue
                                
                                # 2. Pause/Stop situation
                                if log["event_type"] in ["pause_video", "edx.video.paused", "stop_video", "edx.video.stopped"] and video_id == log["video_id"]:                                    
                                    
                                    watch_duration = (log["event_time"] - video_start_time).seconds
                                    
                                    video_end_time = log["event_time"]
                                    video_interaction_id = course_learner_id + "_" + video_id + "_" + str(video_end_time)
                                 
                                    if watch_duration > 5:                                        
                                        video_interaction_map[video_interaction_id] = {"course_learner_id":course_learner_id, "video_id":video_id, "type": "video", "watch_duration":watch_duration,
                                                                        "times_forward_seek":times_forward_seek, "duration_forward_seek":duration_forward_seek, 
                                                                        "times_backward_seek":times_backward_seek, "duration_backward_seek":duration_backward_seek,
                                                                        "times_speed_up":times_speed_up, "times_speed_down":times_speed_down,
                                                                        "start_time":video_start_time, "end_time":video_end_time}

                                    if log["event_type"] in ["pause_video", "edx.video.paused"]:
                                        pause_check = True
                                        pause_start_time = video_end_time
                                    
                                    # For video seek event
                                    times_forward_seek = 0
                                    duration_forward_seek = 0
                                    times_backward_seek = 0
                                    duration_backward_seek = 0
                                    
                                    # For video speed change event
                                    speed_change_last_time = ""
                                    times_speed_up = 0
                                    times_speed_down = 0
                                    
                                    # For video general information                                  
                                    video_start_time =""
                                    video_id = ""
                                    final_time = log["event_time"]
                                    
                                    continue
                                    
                                # 3/4  Page changed/Session closed
                                if log["event_type"] in navigation_event_types:
                                    
                                    video_end_time = log["event_time"]
                                    watch_duration = (video_end_time - video_start_time).seconds                
                                    video_interaction_id = course_learner_id + "_" + video_id + "_" + str(video_end_time)
                                
                                    if watch_duration > 5:                                        
                                        video_interaction_map[video_interaction_id] = {"course_learner_id":course_learner_id, "video_id":video_id, "type": "video", "watch_duration":watch_duration,
                                                                        "times_forward_seek":times_forward_seek, "duration_forward_seek":duration_forward_seek, 
                                                                        "times_backward_seek":times_backward_seek, "duration_backward_seek":duration_backward_seek,
                                                                        "times_speed_up":times_speed_up, "times_speed_down":times_speed_down,
                                                                        "start_time":video_start_time, "end_time":video_end_time}
                                    
                                    # For video seek event
                                    times_forward_seek = 0
                                    duration_forward_seek = 0
                                    times_backward_seek = 0
                                    duration_backward_seek = 0
                                    
                                    # For video speed change event
                                    speed_change_last_time = ""
                                    times_speed_up = 0
                                    times_speed_down = 0
                                    
                                    # For video general information
                                    video_start_time = ""                                    
                                    video_id = ""
                                    final_time = log["event_time"]
                                    
                                    continue
                        
                    if final_time != "":
                        new_logs = []                
                        for log in event_logs:                 
                            if log["event_time"] > final_time:
                                new_logs.append(log)
                                
                        updated_learner_video_event_logs[course_learner_id] = new_logs                
                     
        current_date = getNextDay(current_date)
        
    video_interaction_record = []
    
    for interaction_id in video_interaction_map.keys():
        video_interaction_id = interaction_id
        course_learner_id = video_interaction_map[interaction_id]["course_learner_id"]
        video_id = video_interaction_map[interaction_id]["video_id"]
        duration = video_interaction_map[interaction_id]["watch_duration"]
        times_forward_seek = video_interaction_map[interaction_id]["times_forward_seek"]
        duration_forward_seek = video_interaction_map[interaction_id]["duration_forward_seek"]
        times_backward_seek = video_interaction_map[interaction_id]["times_backward_seek"]
        duration_backward_seek = video_interaction_map[interaction_id]["duration_backward_seek"]
        times_speed_up = video_interaction_map[interaction_id]["times_speed_up"]
        times_speed_down = video_interaction_map[interaction_id]["times_speed_down"]
        start_time = video_interaction_map[interaction_id]["start_time"]
        end_time = video_interaction_map[interaction_id]["end_time"]
        
        if "times_pause" in video_interaction_map[interaction_id]:
            times_pause = video_interaction_map[interaction_id]["watch_duration"]
            duration_pause = video_interaction_map[interaction_id]["watch_duration"]
        else:
            times_pause = 0
            duration_pause = 0
            
        array = [video_interaction_id, course_learner_id, video_id, duration, times_forward_seek, duration_forward_seek, times_backward_seek, duration_backward_seek, times_speed_up, times_speed_down, times_pause, duration_pause, start_time, end_time]
        video_interaction_record.append(array)
    
    # Video_interaction table
    # Database version
    for array in video_interaction_record:
        interaction_id = array[0]
        course_learner_id = array[1]
        video_id = array[2]
        duration = array[3]
        times_forward_seek = array[4]
        duration_forward_seek = array[5]
        times_backward_seek = array[6]
        duration_backward_seek = array[7]
        times_speed_up = array[8]
        times_speed_down = array[9]
        times_pause = array[10]
        duration_pause = array[11]
        start_time = array[12]
        end_time = array[13]
        sql = "insert into video_interaction(interaction_id, course_learner_id, video_id, duration, times_forward_seek, duration_forward_seek, times_backward_seek, duration_backward_seek, times_speed_up, times_speed_down, times_pause, duration_pause, start_time, end_time) values"
        sql += "('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s');" % (interaction_id, course_learner_id, video_id, duration, times_forward_seek, duration_forward_seek, times_backward_seek, duration_backward_seek, times_speed_up, times_speed_down, times_pause, duration_pause, start_time, end_time)
        cursor.execute(sql)
        
    # File version
    '''
Exemple #4
0
def quiz_mode(metadata_path, log_path, cursor):
    
    quiz_question_record = []
    submissions = {}
    assessments = {}
    
    # Collect course information
    course_metadata_map = ExtractCourseInformation(metadata_path)
    
    quiz_question_array = course_metadata_map["quiz_question_array"]
    block_type_map = course_metadata_map["block_type_map"]
    for question_id in quiz_question_array:    
        quiz_question_parent = course_metadata_map["child_parent_map"][question_id]                
        while not block_type_map.has_key(quiz_question_parent):
            quiz_question_parent = course_metadata_map["child_parent_map"][quiz_question_parent]        
        quiz_question_type = block_type_map[quiz_question_parent]
        array = [question_id, quiz_question_type]
        quiz_question_record.append(array)          
                            
    # Processing events data
    submission_event_collection = []

    # Problem check
    submission_event_collection.append("problem_check")     # Server
    submission_event_collection.append("save_problem_check")
    submission_event_collection.append("problem_check_fail")
    submission_event_collection.append("save_problem_check_fail")
    
    # The server emits a problem_graded event each time a user selects Check for a problem and it is graded success- fully.
    submission_event_collection.append("problem_graded")
    
    # The server emits problem_rescore events when a problem is successfully rescored.
    submission_event_collection.append("problem_rescore")
    submission_event_collection.append("problem_rescore_fail")
    
    submission_event_collection.append("problem_reset") # event_source: serve
    submission_event_collection.append("reset_problem")
    submission_event_collection.append("reset_problem_fail")
    
    # The server emits problem_save events after a user saves a problem.
    submission_event_collection.append("problem_save") # event_source: server
    submission_event_collection.append("save_problem_fail")
    submission_event_collection.append("save_problem_success")
    
    # Show answer
    submission_event_collection.append("problem_show")
    submission_event_collection.append("showanswer")
    
    current_date = course_metadata_map["start_date"]   
    end_next_date = getNextDay(course_metadata_map["end_date"])

    log_files = os.listdir(log_path)
    
    while True:
        
        if current_date == end_next_date:
            break;
        
        for file in log_files:
            if current_date in file:
                
                print file
                
                input_file = open(log_path + file,"r")                
                lines = input_file.readlines()
                        
                for line in lines:                              
                    
                    jsonObject = json.loads(line)
                
                    if jsonObject["event_type"] in submission_event_collection:
                        
                        global_learner_id = jsonObject["context"]["user_id"]
                        
                        if global_learner_id != "":
                            
                            course_id = jsonObject["context"]["course_id"]
                            course_learner_id = course_id + "_" + str(global_learner_id)
                            
                            question_id = ""
                        
                            grade = ""
                            max_grade = ""
                            
                            event_time = jsonObject["time"]
                            event_time = event_time[0:19]
                            event_time = event_time.replace("T", " ")
                            event_time = datetime.datetime.strptime(event_time,"%Y-%m-%d %H:%M:%S")               
                        
                            if isinstance(jsonObject["event"], dict):
                                question_id = jsonObject["event"]["problem_id"]
                                
                                # The fields "grade" and "max_grade" are specific to submission event "problem_check"
                                if jsonObject["event"].has_key("grade") and jsonObject["event"].has_key("max_grade"):
                                    grade = jsonObject["event"]["grade"]
                                    max_grade = jsonObject["event"]["max_grade"]
                
                            if question_id != "":
                                
                                submission_id = course_learner_id + "_" + question_id
                            
                                # For submissions
                                array = [submission_id, course_learner_id, question_id, event_time]
                                submissions[submission_id] = array 
                            
                                # For assessments
                                if grade != "" and max_grade != "":
                                    array = [submission_id, course_learner_id, max_grade, grade]
                                    assessments[submission_id] = array
                                        
        
        current_date = getNextDay(current_date)
        
    submission_record = []
    assessment_record = []
    
    for submission_id in submissions.keys():
        submission_record.append(submissions[submission_id])
        
    for assessment_id in assessments.keys():
        assessment_record.append(assessments[assessment_id])
    
    # Database version
    # Quiz_question table
    for array in quiz_question_record:
        question_id = array[0]
        question_type = array[1]
        sql = "insert into quiz_questions(question_id, question_type) values"
        sql += "('%s','%s');" % (question_id, question_type)                    
        cursor.execute(sql)
        
    # Submissions table
    for array in submission_record:
        submission_id = array[0]
        course_learner_id = array[1]
        question_id = array[2]
        submission_timestamp = array[3]
        sql = "insert into submissions(submission_id, course_learner_id, question_id, submission_timestamp) values"
        sql += "('%s','%s','%s','%s');" % (submission_id, course_learner_id, question_id, event_time)
        cursor.execute(sql)
        
    # Submissions table
    for array in assessment_record:
        assessment_id = array[0]
        course_learner_id = array[1]
        max_grade = array[2]
        grade = array[3]
        sql = "insert into assessments(assessment_id, course_learner_id, max_grade, grade) values"
        sql += "('%s','%s','%s','%s');" % (assessment_id, course_learner_id, max_grade, grade)
        cursor.execute(sql)

    ''' 
def forum_sessions(metadata_path, log_path, cursor):
    
    # Collect course information
    course_metadata_map = ExtractCourseInformation(metadata_path)
    
    start_date = course_metadata_map["start_date"]
    end_date = course_metadata_map["end_date"]

    current_date = start_date   
    end_next_date = getNextDay(end_date)
    
    forum_event_types = []
    forum_event_types.append("edx.forum.comment.created")
    forum_event_types.append("edx.forum.response.created")
    forum_event_types.append("edx.forum.response.voted")
    forum_event_types.append("edx.forum.thread.created")
    forum_event_types.append("edx.forum.thread.voted")
    forum_event_types.append("edx.forum.searched")
        
    learner_all_event_logs = {}
    updated_learner_all_event_logs = {}
    
    forum_sessions_record = []
    
    log_files = os.listdir(log_path)
    
    while True:
        
        if current_date == end_next_date:
            break;
        
        for log_file in log_files:
            
            if current_date in log_file:                
                
                print log_file 
                learner_all_event_logs.clear()
                learner_all_event_logs = updated_learner_all_event_logs.copy()
                updated_learner_all_event_logs.clear()
                
                # Course_learner_id set
                course_learner_id_set = set()
                for course_learner_id in learner_all_event_logs.keys():
                    course_learner_id_set.add(course_learner_id)

                log_file = open(log_path + log_file,"r")
                lines = log_file.readlines()

                for line in lines:
                    
                    jsonObject = json.loads(line)
                    
                    # Some daily logs don't have the "user_id" value
                    if "user_id" not in jsonObject["context"]:
                        continue

                    if jsonObject["context"]["user_id"] == "":
                        continue
                    
                    # For forum session separation
                    global_learner_id = jsonObject["context"]["user_id"]
                    event_type = str(jsonObject["event_type"])
                    
                    if "/discussion/" in event_type or event_type in forum_event_types:
                        if event_type != "edx.forum.searched":
                            event_type = "forum_activity"
                                            
                    if global_learner_id != "":
                        
                        course_id = jsonObject["context"]["course_id"]
                        course_learner_id = course_id + "_" + str(global_learner_id)
                        
                        event_time = jsonObject["time"]
                        event_time = event_time[0:19]
                        event_time = event_time.replace("T", " ")
                        event_time = datetime.datetime.strptime(event_time,"%Y-%m-%d %H:%M:%S")

                        # added for relevant elements
                        event_page = ""
                        if jsonObject.has_key("page"):
                            event_page = str(jsonObject["page"])
                        
                        event_path = ""
                        if jsonObject.has_key("path"):
                            event_path = str(jsonObject["path"])
                        
                        event_referer = ""
                        if jsonObject.has_key("referer"):
                            event_referer = str(jsonObject["referer"])
                                               
                        if course_learner_id in course_learner_id_set:
                            learner_all_event_logs[course_learner_id].append({"event_time":event_time, "event_type":event_type, "page":event_page, "path":event_path, "referer":event_referer})
                        else:
                            learner_all_event_logs[course_learner_id] = [{"event_time":event_time, "event_type":event_type, "page":event_page, "path":event_path, "referer":event_referer}]
                            course_learner_id_set.add(course_learner_id)
                            
                # For forum session separation
                for learner in learner_all_event_logs.keys():
                    
                    course_learner_id = learner                    
                    event_logs = learner_all_event_logs[learner]
                    course_id = course_learner_id.split("_")[0]
                    
                    # Sorting
                    event_logs.sort(cmp=cmp_datetime, key=operator.itemgetter('event_time'))
                    
                    session_id = ""
                    start_time = ""
                    end_time = ""                    
                    times_search = 0
                    
                    final_time = ""

                    # represent the elements which just before the session.
                    session_rel_element_pre = ""
                    # represent the elements which is mentioned in the session.
                    session_rel_element_cur = ""
                    
                    for i in range(len(event_logs)):

                        rel_element_cur = courseElementsFinder(event_logs[i], course_id)

                        if session_id == "":                            
                            
                            if event_logs[i]["event_type"] in ["forum_activity", "edx.forum.searched"]:
                                # Initialization
                                session_id = "forum_session_" + course_learner_id
                                start_time = event_logs[i]["event_time"]
                                end_time = event_logs[i]["event_time"]
                                if event_logs[i]["event_type"] == "edx.forum.searched":
                                    times_search += 1
                                # Added for relevant element id
                                session_rel_element_cur = rel_element_cur                                                        
                        else:
                            
                            if event_logs[i]["event_type"] in ["forum_activity", "edx.forum.searched"]:

                                if event_logs[i]["event_time"] > end_time + datetime.timedelta(hours=0.5):
                                    
                                    session_id = session_id + "_" + str(start_time) + "_" + str(end_time)
                                    duration = (end_time - start_time).days * 24 * 60 * 60 + (end_time - start_time).seconds
                                    
                                    if duration > 5:
                                        rel_element_id = ""
                                        if session_rel_element_cur != "":
                                            rel_element_id = session_rel_element_cur
                                        else:
                                            rel_element_id = session_rel_element_pre
                                        array = [session_id, course_learner_id, times_search, start_time, end_time, duration, rel_element_id]
                                        forum_sessions_record.append(array)
                                    
                                    final_time = event_logs[i]["event_time"]
                                    
                                    # Re-initialization
                                    session_id = "forum_session_" + course_learner_id
                                    start_time = event_logs[i]["event_time"]
                                    end_time = event_logs[i]["event_time"]
                                    if event_logs[i]["event_type"] == "edx.forum.searched":
                                        times_search = 1
                                    # Added for relevant element id
                                    session_rel_element_cur = rel_element_cur
                                        
                                else:
                                    
                                    end_time = event_logs[i]["event_time"]
                                    if event_logs[i]["event_type"] == "edx.forum.searched":
                                        times_search += 1
                                    if session_rel_element_cur == "":
                                        session_rel_element_cur = rel_element_cur
                                                        
                            else:
                                
                                if event_logs[i]["event_time"] <= end_time + datetime.timedelta(hours=0.5):
                                    end_time = event_logs[i]["event_time"]

                                session_id = session_id + "_" + str(start_time) + "_" + str(end_time)
                                duration = (end_time - start_time).days * 24 * 60 * 60 + (end_time - start_time).seconds
                                
                                if duration > 5:
                                    rel_element_id = ""
                                    if session_rel_element_cur != "":
                                        rel_element_id = session_rel_element_cur
                                    else:
                                        rel_element_id = session_rel_element_pre
                                    array = [session_id, course_learner_id, times_search, start_time, end_time, duration, rel_element_id]
                                    forum_sessions_record.append(array)
                                    
                                final_time = event_logs[i]["event_time"]
                                    
                                # Re-initialization
                                session_id = ""
                                start_time = ""
                                end_time = ""
                                times_search = 0

                        # session_rel_element_pre is used for recording the element id 
                        # of the most recent event logs before the session logs
                        if rel_element_cur != "":
                            session_rel_element_pre = rel_element_cur
  
                    if final_time != "":
                        new_logs = []                
                        for log in event_logs:                 
                            if log["event_time"] >= final_time:
                                new_logs.append(log)
                                
                        updated_learner_all_event_logs[course_learner_id] = new_logs
                
                log_file.close()
                
        current_date = getNextDay(current_date)
    
    # Database version
    for array in forum_sessions_record:
        session_id = array[0]
        course_learner_id = array[1]
        times_search = process_null(array[2])
        start_time = array[3]
        end_time = array[4]
        duration = process_null(array[5])
        rel_element_id = array[6]
        sql = "insert into forum_sessions (session_id, course_learner_id, times_search, start_time, end_time, duration, relevent_element_id) values (%s,%s,%s,%s,%s,%s,%s)"
        data = (session_id, course_learner_id, times_search, start_time, end_time, duration, rel_element_id)
        cursor.execute(sql, data)
            
    # File version
    '''
def quiz_mode(metadata_path, log_path, cursor):
    
    # quiz_question_record = []
    # submissions = {}
    # assessments = {}
    
    # Collect course information
    course_metadata_map = ExtractCourseInformation(metadata_path)
    
    quiz_question_map = course_metadata_map["quiz_question_map"]
    block_type_map = course_metadata_map["block_type_map"]
    element_time_map_due = course_metadata_map["element_time_map_due"]

    for question_id in quiz_question_map:

        question_due = ""

        question_weight = quiz_question_map[question_id]

        quiz_question_parent = course_metadata_map["child_parent_map"][question_id]
        
        if (question_due == "") and (quiz_question_parent in element_time_map_due):
            question_due = element_time_map_due[quiz_question_parent]

        while not block_type_map.has_key(quiz_question_parent):
            quiz_question_parent = course_metadata_map["child_parent_map"][quiz_question_parent]
            if (question_due == "") and (quiz_question_parent in element_time_map_due):
                question_due = element_time_map_due[quiz_question_parent]        
        
        quiz_question_type = block_type_map[quiz_question_parent]
        question_due = process_null(question_due)
        # array_quiz = [question_id, quiz_question_type, question_weight, question_due]
        # quiz_question_record.append(array_quiz)
        sql = "insert into quiz_questions(question_id, question_type, question_weight, question_due) values (%s,%s,%s,%s)"
        data = (question_id, quiz_question_type, question_weight, question_due)                    
        cursor.execute(sql, data)          
                            
    # Processing events data
    submission_event_collection = []

    # Problem check
    submission_event_collection.append("problem_check")     # Server
    
    '''
    submission_event_collection.append("save_problem_check")
    submission_event_collection.append("problem_check_fail")
    submission_event_collection.append("save_problem_check_fail")
    
    # The server emits a problem_graded event each time a user selects Check for a problem and it is graded success- fully.
    submission_event_collection.append("problem_graded")
    
    # The server emits problem_rescore events when a problem is successfully rescored.
    submission_event_collection.append("problem_rescore")
    submission_event_collection.append("problem_rescore_fail")
    
    submission_event_collection.append("problem_reset") # event_source: serve
    submission_event_collection.append("reset_problem")
    submission_event_collection.append("reset_problem_fail")
    
    # The server emits problem_save events after a user saves a problem.
    submission_event_collection.append("problem_save") # event_source: server
    submission_event_collection.append("save_problem_fail")
    submission_event_collection.append("save_problem_success")
    
    # Show answer
    submission_event_collection.append("problem_show")
    submission_event_collection.append("showanswer")
    '''
    
    current_date = course_metadata_map["start_date"]   
    end_next_date = getNextDay(course_metadata_map["end_date"])

    log_files = os.listdir(log_path)
    
    submission_uni_index = 0
    while True:
        
        if current_date == end_next_date:
            break;
        
        for file in log_files:
            if current_date in file:
                
                print file
                
                input_file = open(log_path + file,"r")                
                lines = input_file.readlines()
                        
                for line in lines:                              
                    
                    jsonObject = json.loads(line)
                
                    if jsonObject["event_type"] in submission_event_collection:
                        
                        # Some daily logs don't have the "user_id" value
                        if "user_id" not in jsonObject["context"]:
                            continue
                        
                        global_learner_id = jsonObject["context"]["user_id"]
                        
                        if global_learner_id != "":
                            
                            course_id = jsonObject["context"]["course_id"]
                            course_learner_id = course_id + "_" + str(global_learner_id)
                            
                            question_id = ""
                        
                            grade = ""
                            max_grade = ""
                            
                            event_time = jsonObject["time"]
                            event_time = event_time[0:19]
                            event_time = event_time.replace("T", " ")
                            event_time = datetime.datetime.strptime(event_time,"%Y-%m-%d %H:%M:%S")               
                        
                            if isinstance(jsonObject["event"], dict):
                                question_id = jsonObject["event"]["problem_id"]
                                
                                # The fields "grade" and "max_grade" are specific to submission event "problem_check"
                                if jsonObject["event"].has_key("grade") and jsonObject["event"].has_key("max_grade"):
                                    grade = jsonObject["event"]["grade"]
                                    max_grade = jsonObject["event"]["max_grade"]
                
                            if question_id != "":
                                
                                submission_id = course_learner_id + "_" + question_id + "_" + str(submission_uni_index)
                                submission_uni_index = submission_uni_index + 1
                            
                                # For submissions
                                # array_submission = [submission_id, course_learner_id, question_id, event_time]
                                # submissions[submission_id] = array_submission
                                submission_timestamp = event_time
                                sql = "insert into submissions(submission_id, course_learner_id, question_id, submission_timestamp) values (%s,%s,%s,%s)"
                                data = (submission_id, course_learner_id, question_id, submission_timestamp)
                                cursor.execute(sql, data) 
                            
                                # For assessments
                                if grade != "" and max_grade != "":
                                    # array_assessment = [submission_id, course_learner_id, max_grade, grade]
                                    # assessments[submission_id] = array_assessment
                                    assessment_id = submission_id
                                    sql = "insert into assessments(assessment_id, course_learner_id, max_grade, grade) values (%s,%s,%s,%s)"
                                    data = (assessment_id, course_learner_id, max_grade, grade)
                                    cursor.execute(sql, data)
                                        
        
        current_date = getNextDay(current_date)
        
    # submission_record = []
    # assessment_record = []
    
    # for submission_id in submissions.keys():
    #     submission_record.append(submissions[submission_id])
        
    # for assessment_id in assessments.keys():
    #     assessment_record.append(assessments[assessment_id])
    
    # Database version
    # Quiz_question table
    # for array in quiz_question_record:
    #     question_id = array[0]
    #     question_type = array[1]
    #     question_weight = array[2]
    #     question_due = array[3]
    #     sql = "insert into quiz_questions(question_id, question_type, question_weight, question_due) values"
    #     sql += "('%s','%s','%s','%s');" % (question_id, question_type, question_weight, question_due)                    
    #     cursor.execute(sql)
        
    # Submissions table
    # for array in submission_record:
    #     submission_id = array[0]
    #     course_learner_id = array[1]
    #     question_id = array[2]
    #     submission_timestamp = array[3]
    #     sql = "insert into submissions(submission_id, course_learner_id, question_id, submission_timestamp) values"
    #     sql += "('%s','%s','%s','%s');" % (submission_id, course_learner_id, question_id, submission_timestamp)
    #     cursor.execute(sql)
        
    # Submissions table
    # for array in assessment_record:
    #     assessment_id = array[0]
    #     course_learner_id = array[1]
    #     max_grade = array[2]
    #     grade = array[3]
    #     sql = "insert into assessments(assessment_id, course_learner_id, max_grade, grade) values"
    #     sql += "('%s','%s','%s','%s');" % (assessment_id, course_learner_id, max_grade, grade)
    #     cursor.execute(sql)

    ''' 
Exemple #7
0
def forum_sessions(metadata_path, log_path, cursor):

    # Collect course information
    course_metadata_map = ExtractCourseInformation(metadata_path)

    start_date = course_metadata_map["start_date"]
    end_date = course_metadata_map["end_date"]

    current_date = start_date
    end_next_date = getNextDay(end_date)

    forum_event_types = []
    forum_event_types.append("edx.forum.comment.created")
    forum_event_types.append("edx.forum.response.created")
    forum_event_types.append("edx.forum.response.voted")
    forum_event_types.append("edx.forum.thread.created")
    forum_event_types.append("edx.forum.thread.voted")
    forum_event_types.append("edx.forum.searched")

    learner_all_event_logs = {}
    updated_learner_all_event_logs = {}

    forum_sessions_record = []

    log_files = os.listdir(log_path)

    while True:

        if current_date == end_next_date:
            break

        for log_file in log_files:

            if current_date in log_file:

                print log_file
                learner_all_event_logs.clear()
                learner_all_event_logs = updated_learner_all_event_logs.copy()
                updated_learner_all_event_logs.clear()

                # Course_learner_id set
                course_learner_id_set = set()
                for course_learner_id in learner_all_event_logs.keys():
                    course_learner_id_set.add(course_learner_id)

                log_file = open(log_path + log_file, "r")
                lines = log_file.readlines()

                for line in lines:

                    jsonObject = json.loads(line)

                    # Some daily logs don't have the "user_id" value
                    if "user_id" not in jsonObject["context"]:
                        continue

                    if jsonObject["context"]["user_id"] == "":
                        continue

                    # For forum session separation
                    global_learner_id = jsonObject["context"]["user_id"]
                    event_type = str(jsonObject["event_type"])

                    if "/discussion/" in event_type or event_type in forum_event_types:
                        if event_type != "edx.forum.searched":
                            event_type = "forum_activity"

                    if global_learner_id != "":

                        course_id = jsonObject["context"]["course_id"]
                        course_learner_id = course_id + "_" + str(
                            global_learner_id)

                        event_time = jsonObject["time"]
                        event_time = event_time[0:19]
                        event_time = event_time.replace("T", " ")
                        event_time = datetime.datetime.strptime(
                            event_time, "%Y-%m-%d %H:%M:%S")

                        # added for relevant elements
                        event_page = ""
                        if jsonObject.has_key("page"):
                            event_page = str(jsonObject["page"])

                        event_path = ""
                        if jsonObject.has_key("path"):
                            event_path = str(jsonObject["path"])

                        event_referer = ""
                        if jsonObject.has_key("referer"):
                            event_referer = str(jsonObject["referer"])

                        if course_learner_id in course_learner_id_set:
                            learner_all_event_logs[course_learner_id].append({
                                "event_time":
                                event_time,
                                "event_type":
                                event_type,
                                "page":
                                event_page,
                                "path":
                                event_path,
                                "referer":
                                event_referer
                            })
                        else:
                            learner_all_event_logs[course_learner_id] = [{
                                "event_time":
                                event_time,
                                "event_type":
                                event_type,
                                "page":
                                event_page,
                                "path":
                                event_path,
                                "referer":
                                event_referer
                            }]
                            course_learner_id_set.add(course_learner_id)

                # For forum session separation
                for learner in learner_all_event_logs.keys():

                    course_learner_id = learner
                    event_logs = learner_all_event_logs[learner]
                    course_id = course_learner_id.split("_")[0]

                    # Sorting
                    event_logs.sort(cmp=cmp_datetime,
                                    key=operator.itemgetter('event_time'))

                    session_id = ""
                    start_time = ""
                    end_time = ""
                    times_search = 0

                    final_time = ""

                    # represent the elements which just before the session.
                    session_rel_element_pre = ""
                    # represent the elements which is mentioned in the session.
                    session_rel_element_cur = ""

                    for i in range(len(event_logs)):

                        rel_element_cur = courseElementsFinder(
                            event_logs[i], course_id)

                        if session_id == "":

                            if event_logs[i]["event_type"] in [
                                    "forum_activity", "edx.forum.searched"
                            ]:
                                # Initialization
                                session_id = "forum_session_" + course_learner_id
                                start_time = event_logs[i]["event_time"]
                                end_time = event_logs[i]["event_time"]
                                if event_logs[i][
                                        "event_type"] == "edx.forum.searched":
                                    times_search += 1
                                # Added for relevant element id
                                session_rel_element_cur = rel_element_cur
                        else:

                            if event_logs[i]["event_type"] in [
                                    "forum_activity", "edx.forum.searched"
                            ]:

                                if event_logs[i][
                                        "event_time"] > end_time + datetime.timedelta(
                                            hours=0.5):

                                    session_id = session_id + "_" + str(
                                        start_time) + "_" + str(end_time)
                                    duration = (end_time - start_time
                                                ).days * 24 * 60 * 60 + (
                                                    end_time -
                                                    start_time).seconds

                                    if duration > 5:
                                        rel_element_id = ""
                                        if session_rel_element_cur != "":
                                            rel_element_id = session_rel_element_cur
                                        else:
                                            rel_element_id = session_rel_element_pre
                                        array = [
                                            session_id, course_learner_id,
                                            times_search, start_time, end_time,
                                            duration, rel_element_id
                                        ]
                                        forum_sessions_record.append(array)

                                    final_time = event_logs[i]["event_time"]

                                    # Re-initialization
                                    session_id = "forum_session_" + course_learner_id
                                    start_time = event_logs[i]["event_time"]
                                    end_time = event_logs[i]["event_time"]
                                    if event_logs[i][
                                            "event_type"] == "edx.forum.searched":
                                        times_search = 1
                                    # Added for relevant element id
                                    session_rel_element_cur = rel_element_cur

                                else:

                                    end_time = event_logs[i]["event_time"]
                                    if event_logs[i][
                                            "event_type"] == "edx.forum.searched":
                                        times_search += 1
                                    if session_rel_element_cur == "":
                                        session_rel_element_cur = rel_element_cur

                            else:

                                if event_logs[i][
                                        "event_time"] <= end_time + datetime.timedelta(
                                            hours=0.5):
                                    end_time = event_logs[i]["event_time"]

                                session_id = session_id + "_" + str(
                                    start_time) + "_" + str(end_time)
                                duration = (end_time -
                                            start_time).days * 24 * 60 * 60 + (
                                                end_time - start_time).seconds

                                if duration > 5:
                                    rel_element_id = ""
                                    if session_rel_element_cur != "":
                                        rel_element_id = session_rel_element_cur
                                    else:
                                        rel_element_id = session_rel_element_pre
                                    array = [
                                        session_id, course_learner_id,
                                        times_search, start_time, end_time,
                                        duration, rel_element_id
                                    ]
                                    forum_sessions_record.append(array)

                                final_time = event_logs[i]["event_time"]

                                # Re-initialization
                                session_id = ""
                                start_time = ""
                                end_time = ""
                                times_search = 0

                        # session_rel_element_pre is used for recording the element id
                        # of the most recent event logs before the session logs
                        if rel_element_cur != "":
                            session_rel_element_pre = rel_element_cur

                    if final_time != "":
                        new_logs = []
                        for log in event_logs:
                            if log["event_time"] >= final_time:
                                new_logs.append(log)

                        updated_learner_all_event_logs[
                            course_learner_id] = new_logs

                log_file.close()

        current_date = getNextDay(current_date)

    # Database version
    for array in forum_sessions_record:
        session_id = array[0]
        course_learner_id = array[1]
        times_search = process_null(array[2])
        start_time = array[3]
        end_time = array[4]
        duration = process_null(array[5])
        rel_element_id = array[6]
        sql = "insert into forum_sessions (session_id, course_learner_id, times_search, start_time, end_time, duration, relevent_element_id) values (%s,%s,%s,%s,%s,%s,%s)"
        data = (session_id, course_learner_id, times_search, start_time,
                end_time, duration, rel_element_id)
        cursor.execute(sql, data)

    # File version
    '''
Exemple #8
0
def forum_sessions(metadata_path, log_path, cursor):
    
    # Collect course information
    course_metadata_map = ExtractCourseInformation(metadata_path)
    
    start_date = course_metadata_map["start_date"]
    end_date = course_metadata_map["end_date"]

    current_date = start_date   
    end_next_date = getNextDay(end_date)
    
    forum_event_types = []
    forum_event_types.append("edx.forum.comment.created")
    forum_event_types.append("edx.forum.response.created")
    forum_event_types.append("edx.forum.response.voted")
    forum_event_types.append("edx.forum.thread.created")
    forum_event_types.append("edx.forum.thread.voted")
    forum_event_types.append("edx.forum.searched")
        
    learner_all_event_logs = {}
    updated_learner_all_event_logs = {}
    
    forum_sessions_record = []
    
    log_files = os.listdir(log_path)
    
    while True:
        
        if current_date == end_next_date:
            break;
        
        for log_file in log_files:
            
            if current_date in log_file:                
                
                print log_file 
                learner_all_event_logs.clear()
                learner_all_event_logs = updated_learner_all_event_logs.copy()
                updated_learner_all_event_logs.clear()
                
                # Course_learner_id set
                course_learner_id_set = set()
                for course_learner_id in learner_all_event_logs.keys():
                    course_learner_id_set.add(course_learner_id)

                log_file = open(log_path + log_file,"r")
                lines = log_file.readlines()
                        
                for line in lines:
                    
                    jsonObject = json.loads(line)
                    
                    # For forum session separation
                    global_learner_id = jsonObject["context"]["user_id"]
                    event_type = str(jsonObject["event_type"])
                    
                    if "/discussion/" in event_type or event_type in forum_event_types:
                        if event_type != "edx.forum.searched":
                            event_type = "forum_activity"
                                            
                    if global_learner_id != "":
                        
                        course_id = jsonObject["context"]["course_id"]
                        course_learner_id = course_id + "_" + str(global_learner_id)
                        
                        event_time = jsonObject["time"]
                        event_time = event_time[0:19]
                        event_time = event_time.replace("T", " ")
                        event_time = datetime.datetime.strptime(event_time,"%Y-%m-%d %H:%M:%S")
                                               
                        if course_learner_id in course_learner_id_set:
                            learner_all_event_logs[course_learner_id].append({"event_time":event_time, "event_type":event_type})
                        else:
                            learner_all_event_logs[course_learner_id] = [{"event_time":event_time, "event_type":event_type}]
                            course_learner_id_set.add(course_learner_id)
                            
                # For forum session separation
                for learner in learner_all_event_logs.keys():
                    
                    course_learner_id = learner                    
                    event_logs = learner_all_event_logs[learner]
                    
                    # Sorting
                    event_logs.sort(cmp=cmp_datetime, key=operator.itemgetter('event_time'))
                    
                    session_id = ""
                    start_time = ""
                    end_time = ""                    
                    times_search = 0
                    
                    final_time = ""
                    
                    for i in range(len(event_logs)):
                        
                        if session_id =="":                            
                            
                            if event_logs[i]["event_type"] in ["forum_activity", "edx.forum.searched"]:
                                # Initialization
                                session_id = "forum_session_" + course_learner_id
                                start_time = event_logs[i]["event_time"]
                                end_time = event_logs[i]["event_time"]
                                if event_logs[i]["event_type"] == "edx.forum.searched":
                                    times_search += 1                                                        
                        else:
                            
                            if event_logs[i]["event_type"] in ["forum_activity", "edx.forum.searched"]:

                                if event_logs[i]["event_time"] > end_time + datetime.timedelta(hours=0.5):
                                    
                                    session_id = session_id + "_" + str(start_time) + "_" + str(end_time)
                                    duration = (end_time - start_time).days * 24 * 60 * 60 + (end_time - start_time).seconds
                                    
                                    if duration > 5:
                                        array = [session_id, course_learner_id, times_search, start_time, end_time, duration]
                                        forum_sessions_record.append(array)
                                    
                                    final_time = event_logs[i]["event_time"]
                                    
                                    # Re-initialization
                                    session_id = "forum_session_" + course_learner_id
                                    start_time = event_logs[i]["event_time"]
                                    end_time = event_logs[i]["event_time"]
                                    if event_logs[i]["event_type"] == "edx.forum.searched":
                                        times_search = 1
                                        
                                else:
                                    
                                    end_time = event_logs[i]["event_time"]
                                    if event_logs[i]["event_type"] == "edx.forum.searched":
                                        times_search += 1
                                                        
                            else:
                                
                                end_time = event_logs[i]["event_time"]
                                session_id = session_id + "_" + str(start_time) + "_" + str(end_time)
                                duration = (end_time - start_time).days * 24 * 60 * 60 + (end_time - start_time).seconds
                                
                                if duration > 5:     
                                    array = [session_id, course_learner_id, times_search, start_time, end_time, duration]
                                    forum_sessions_record.append(array)
                                    
                                final_time = event_logs[i]["event_time"]
                                    
                                # Re-initialization
                                session_id = ""
                                start_time = ""
                                end_time = ""
                                times_search = 0
  
                    if final_time != "":
                        new_logs = []                
                        for log in event_logs:                 
                            if log["event_time"] >= final_time:
                                new_logs.append(log)
                                
                        updated_learner_all_event_logs[course_learner_id] = new_logs
                
                log_file.close()
                
        current_date = getNextDay(current_date)
    
    # Database version
    for array in forum_sessions_record:
        session_id = array[0]
        course_learner_id = array[1]
        times_search = array[2]
        start_time = array[3]
        end_time = array[4]
        duration = array[5]
        sql = "insert into forum_sessions (session_id, course_learner_id, times_search, start_time, end_time, duration) values"
        sql += "('%s','%s','%s','%s', '%s','%s');" % (session_id, course_learner_id, times_search, start_time, end_time, duration)
        cursor.execute(sql)
            
    # File version
    '''
Exemple #9
0
def sessions(metadata_path, log_path, cursor):

    # Collect course information
    course_metadata_map = ExtractCourseInformation(metadata_path)

    current_date = course_metadata_map["start_date"]
    end_next_date = getNextDay(course_metadata_map["end_date"])

    learner_all_event_logs = {}
    updated_learner_all_event_logs = {}
    session_record = []

    log_files = os.listdir(log_path)

    while True:

        if current_date == end_next_date:
            break

        for file in log_files:

            if current_date in file:

                print file

                learner_all_event_logs.clear()
                learner_all_event_logs = updated_learner_all_event_logs.copy()
                updated_learner_all_event_logs.clear()

                # Course_learner_id set
                course_learner_id_set = set()
                for course_learner_id in learner_all_event_logs.keys():
                    course_learner_id_set.add(course_learner_id)

                input_file = open(log_path + file, "r")
                lines = input_file.readlines()

                for line in lines:

                    jsonObject = json.loads(line)

                    # Some daily logs don't have the "user_id" value
                    if "user_id" not in jsonObject["context"]:
                        continue

                    global_learner_id = jsonObject["context"]["user_id"]
                    event_type = str(jsonObject["event_type"])

                    if global_learner_id != "":
                        course_id = jsonObject["context"]["course_id"]
                        course_learner_id = course_id + "_" + str(
                            global_learner_id)

                        event_time = jsonObject["time"]
                        event_time = event_time[0:19]
                        event_time = event_time.replace("T", " ")
                        event_time = datetime.datetime.strptime(
                            event_time, "%Y-%m-%d %H:%M:%S")

                        if course_learner_id in course_learner_id_set:
                            learner_all_event_logs[course_learner_id].append({
                                "event_time":
                                event_time,
                                "event_type":
                                event_type
                            })
                        else:
                            learner_all_event_logs[course_learner_id] = [{
                                "event_time":
                                event_time,
                                "event_type":
                                event_type
                            }]
                            course_learner_id_set.add(course_learner_id)

                for course_learner_id in learner_all_event_logs.keys():

                    event_logs = learner_all_event_logs[course_learner_id]

                    # Sorting
                    event_logs.sort(cmp=cmp_datetime,
                                    key=operator.itemgetter('event_time'))

                    session_id = ""
                    start_time = ""
                    end_time = ""

                    final_time = ""

                    for i in range(len(event_logs)):

                        if start_time == "":

                            # Initialization
                            start_time = event_logs[i]["event_time"]
                            end_time = event_logs[i]["event_time"]

                        else:

                            if event_logs[i][
                                    "event_time"] > end_time + datetime.timedelta(
                                        hours=0.5):

                                session_id = course_learner_id + "_" + str(
                                    start_time) + "_" + str(end_time)
                                duration = (end_time -
                                            start_time).days * 24 * 60 * 60 + (
                                                end_time - start_time).seconds

                                if duration > 5:
                                    array = [
                                        session_id, course_learner_id,
                                        start_time, end_time, duration
                                    ]
                                    session_record.append(array)

                                final_time = event_logs[i]["event_time"]

                                # Re-initialization
                                session_id = ""
                                start_time = event_logs[i]["event_time"]
                                end_time = event_logs[i]["event_time"]

                            else:

                                if event_logs[i]["event_type"] == "page_close":

                                    end_time = event_logs[i]["event_time"]

                                    session_id = course_learner_id + "_" + str(
                                        start_time) + "_" + str(end_time)
                                    duration = (end_time - start_time
                                                ).days * 24 * 60 * 60 + (
                                                    end_time -
                                                    start_time).seconds

                                    if duration > 5:
                                        array = [
                                            session_id, course_learner_id,
                                            start_time, end_time, duration
                                        ]
                                        session_record.append(array)

                                    # Re-initialization
                                    session_id = ""
                                    start_time = ""
                                    end_time = ""

                                    final_time = event_logs[i]["event_time"]

                                else:

                                    end_time = event_logs[i]["event_time"]

                    if final_time != "":
                        new_logs = []
                        for log in event_logs:
                            if log["event_time"] >= final_time:
                                new_logs.append(log)

                        updated_learner_all_event_logs[
                            course_learner_id] = new_logs

        current_date = getNextDay(current_date)

    # Filter duplicated records
    updated_session_record = []
    session_id_set = set()
    for array in session_record:
        session_id = array[0]
        if session_id not in session_id_set:
            session_id_set.add(session_id)
            updated_session_record.append(array)

    session_record = updated_session_record

    # Database version
    for array in session_record:
        session_id = array[0]
        course_learner_id = array[1]
        start_time = array[2]
        end_time = array[3]
        duration = process_null(array[4])
        sql = "insert into sessions(session_id, course_learner_id, start_time, end_time, duration) values (%s,%s,%s,%s,%s)"
        data = (session_id, course_learner_id, start_time, end_time, duration)
        cursor.execute(sql, data)

    # File version
    '''
def quiz_sessions(metadata_path, log_path, cursor):

    # Collect course information
    course_metadata_map = ExtractCourseInformation(metadata_path)

    # Processing events data
    submission_event_collection = []

    # Problem check
    submission_event_collection.append("problem_check")  # Server
    submission_event_collection.append("save_problem_check")
    submission_event_collection.append("problem_check_fail")
    submission_event_collection.append("save_problem_check_fail")

    # The server emits a problem_graded event each time a user selects Check for a problem and it is graded success- fully.
    submission_event_collection.append("problem_graded")

    # The server emits problem_rescore events when a problem is successfully rescored.
    submission_event_collection.append("problem_rescore")
    submission_event_collection.append("problem_rescore_fail")

    submission_event_collection.append("problem_reset")  # event_source: serve
    submission_event_collection.append("reset_problem")
    submission_event_collection.append("reset_problem_fail")

    # The server emits problem_save events after a user saves a problem.
    submission_event_collection.append("problem_save")  # event_source: server
    submission_event_collection.append("save_problem_fail")
    submission_event_collection.append("save_problem_success")

    # Show answer
    submission_event_collection.append("problem_show")
    submission_event_collection.append("showanswer")

    current_date = course_metadata_map["start_date"]
    end_next_date = getNextDay(course_metadata_map["end_date"])

    log_files = os.listdir(log_path)

    child_parent_map = course_metadata_map["child_parent_map"]

    learner_all_event_logs = {}
    updated_learner_all_event_logs = {}
    quiz_sessions = {}

    while True:

        if current_date == end_next_date:
            break

        for file in log_files:
            if current_date in file:

                print file

                learner_all_event_logs.clear()
                learner_all_event_logs = updated_learner_all_event_logs.copy()
                updated_learner_all_event_logs.clear()

                # Course_learner_id set
                course_learner_id_set = set()
                for course_learner_id in learner_all_event_logs.keys():
                    course_learner_id_set.add(course_learner_id)

                input_file = open(log_path + file, "r")
                lines = input_file.readlines()

                for line in lines:

                    jsonObject = json.loads(line)

                    # Some daily logs don't have the "user_id" value
                    if "user_id" not in jsonObject["context"]:
                        continue

                    global_learner_id = jsonObject["context"]["user_id"]
                    event_type = str(jsonObject["event_type"])

                    if global_learner_id != "":

                        course_id = jsonObject["context"]["course_id"]
                        course_learner_id = course_id + "_" + str(
                            global_learner_id)

                        event_time = jsonObject["time"]
                        event_time = event_time[0:19]
                        event_time = event_time.replace("T", " ")
                        event_time = datetime.datetime.strptime(
                            event_time, "%Y-%m-%d %H:%M:%S")

                        if learner_all_event_logs.has_key(course_learner_id):
                            learner_all_event_logs[course_learner_id].append({
                                "event_time":
                                event_time,
                                "event_type":
                                event_type
                            })
                        else:
                            learner_all_event_logs[course_learner_id] = [{
                                "event_time":
                                event_time,
                                "event_type":
                                event_type
                            }]

                # For quiz session separation
                for course_learner_id in learner_all_event_logs.keys():

                    event_logs = learner_all_event_logs[course_learner_id]

                    # Sorting
                    event_logs.sort(cmp=cmp_datetime,
                                    key=operator.itemgetter('event_time'))

                    session_id = ""
                    start_time = ""
                    end_time = ""

                    final_time = ""

                    for i in range(len(event_logs)):

                        if session_id == "":

                            if "problem+block" in event_logs[i][
                                    "event_type"] or "_problem;_" in event_logs[
                                        i]["event_type"] or event_logs[i][
                                            "event_type"] in submission_event_collection:

                                event_type_array = event_logs[i][
                                    "event_type"].split("/")

                                if "problem+block" in event_logs[i][
                                        "event_type"]:
                                    question_id = event_type_array[4]

                                if "_problem;_" in event_logs[i]["event_type"]:
                                    question_id = event_type_array[6].replace(
                                        ";_", "/")

                                if question_id in child_parent_map.keys():

                                    parent_block_id = child_parent_map[
                                        question_id]

                                    session_id = "quiz_session_" + parent_block_id + "_" + course_learner_id
                                    start_time = event_logs[i]["event_time"]
                                    end_time = event_logs[i]["event_time"]

                        else:

                            if "problem+block" in event_logs[i][
                                    "event_type"] or "_problem;_" in event_logs[
                                        i]["event_type"] or event_logs[i][
                                            "event_type"] in submission_event_collection:

                                if event_logs[i][
                                        "event_time"] > end_time + datetime.timedelta(
                                            hours=0.5):

                                    if quiz_sessions.has_key(session_id):
                                        quiz_sessions[session_id][
                                            "time_array"].append({
                                                "start_time":
                                                start_time,
                                                "end_time":
                                                end_time
                                            })
                                    else:
                                        quiz_sessions[session_id] = {
                                            "course_learner_id":
                                            course_learner_id,
                                            "time_array": [{
                                                "start_time": start_time,
                                                "end_time": end_time
                                            }]
                                        }

                                    final_time = event_logs[i]["event_time"]

                                    if "problem+block" in event_logs[i][
                                            "event_type"] or "_problem;_" in event_logs[
                                                i]["event_type"] or event_logs[
                                                    i]["event_type"] in submission_event_collection:
                                        event_type_array = event_logs[i][
                                            "event_type"].split("/")

                                        if "problem+block" in event_logs[i][
                                                "event_type"]:
                                            question_id = event_type_array[4]

                                        if "_problem;_" in event_logs[i][
                                                "event_type"]:
                                            question_id = event_type_array[
                                                6].replace(";_", "/")

                                        if question_id in child_parent_map.keys(
                                        ):
                                            parent_block_id = child_parent_map[
                                                question_id]
                                            session_id = "quiz_session_" + parent_block_id + "_" + course_learner_id
                                            start_time = event_logs[i][
                                                "event_time"]
                                            end_time = event_logs[i][
                                                "event_time"]
                                        else:
                                            session_id = ""
                                            start_time = ""
                                            end_time = ""
                                else:
                                    end_time = event_logs[i]["event_time"]

                            else:

                                if event_logs[i][
                                        "event_time"] <= end_time + datetime.timedelta(
                                            hours=0.5):
                                    end_time = event_logs[i]["event_time"]

                                if quiz_sessions.has_key(session_id):
                                    quiz_sessions[session_id][
                                        "time_array"].append({
                                            "start_time":
                                            start_time,
                                            "end_time":
                                            end_time
                                        })
                                else:
                                    quiz_sessions[session_id] = {
                                        "course_learner_id":
                                        course_learner_id,
                                        "time_array": [{
                                            "start_time": start_time,
                                            "end_time": end_time
                                        }]
                                    }

                                final_time = event_logs[i]["event_time"]

                                session_id = ""
                                start_time = ""
                                end_time = ""

                    if final_time != "":
                        new_logs = []
                        for log in event_logs:
                            if log["event_time"] >= final_time:
                                new_logs.append(log)

                        updated_learner_all_event_logs[
                            course_learner_id] = new_logs

        current_date = getNextDay(current_date)

    # To compress the session event_logs
    for session_id in quiz_sessions.keys():
        if len(quiz_sessions[session_id]["time_array"]) > 1:

            start_time = ""
            end_time = ""
            updated_time_array = []

            for i in range(len(quiz_sessions[session_id]["time_array"])):
                if i == 0:
                    start_time = quiz_sessions[session_id]["time_array"][i][
                        "start_time"]
                    end_time = quiz_sessions[session_id]["time_array"][i][
                        "end_time"]
                else:
                    if quiz_sessions[session_id]["time_array"][i][
                            "start_time"] > end_time + datetime.timedelta(
                                hours=0.5):
                        updated_time_array.append({
                            "start_time": start_time,
                            "end_time": end_time
                        })
                        start_time = quiz_sessions[session_id]["time_array"][
                            i]["start_time"]
                        end_time = quiz_sessions[session_id]["time_array"][i][
                            "end_time"]
                        if i == len(
                                quiz_sessions[session_id]["time_array"]) - 1:
                            updated_time_array.append({
                                "start_time": start_time,
                                "end_time": end_time
                            })
                    else:
                        end_time = quiz_sessions[session_id]["time_array"][i][
                            "end_time"]

                        if i == len(
                                quiz_sessions[session_id]["time_array"]) - 1:
                            updated_time_array.append({
                                "start_time": start_time,
                                "end_time": end_time
                            })

            quiz_sessions[session_id]["time_array"] = updated_time_array

    quiz_session_record = []

    for session_id in quiz_sessions.keys():
        course_learner_id = quiz_sessions[session_id]["course_learner_id"]
        for i in range(len(quiz_sessions[session_id]["time_array"])):

            start_time = quiz_sessions[session_id]["time_array"][i][
                "start_time"]
            end_time = quiz_sessions[session_id]["time_array"][i]["end_time"]
            if start_time < end_time:
                duration = (end_time - start_time).days * 24 * 60 * 60 + (
                    end_time - start_time).seconds
                final_session_id = session_id + "_" + str(
                    start_time) + "_" + str(end_time)

                if duration > 5:
                    array = [
                        final_session_id, course_learner_id, start_time,
                        end_time, duration
                    ]
                    quiz_session_record.append(array)

    # Database version
    for array in quiz_session_record:
        session_id = array[0]
        course_learner_id = array[1]
        start_time = array[2]
        end_time = array[3]
        duration = process_null(array[4])
        sql = "insert into quiz_sessions (session_id, course_learner_id, start_time, end_time, duration) values (%s,%s,%s,%s,%s)"
        data = (session_id, course_learner_id, start_time, end_time, duration)
        cursor.execute(sql, data)
    ''' 
def quiz_mode(metadata_path, log_path, cursor):

    # quiz_question_record = []
    # submissions = {}
    # assessments = {}

    # Collect course information
    course_metadata_map = ExtractCourseInformation(metadata_path)

    quiz_question_map = course_metadata_map["quiz_question_map"]
    block_type_map = course_metadata_map["block_type_map"]
    element_time_map_due = course_metadata_map["element_time_map_due"]

    for question_id in quiz_question_map:

        question_due = ""

        question_weight = quiz_question_map[question_id]

        quiz_question_parent = course_metadata_map["child_parent_map"][
            question_id]

        if (question_due == "") and (quiz_question_parent
                                     in element_time_map_due):
            question_due = element_time_map_due[quiz_question_parent]

        while not block_type_map.has_key(quiz_question_parent):
            quiz_question_parent = course_metadata_map["child_parent_map"][
                quiz_question_parent]
            if (question_due == "") and (quiz_question_parent
                                         in element_time_map_due):
                question_due = element_time_map_due[quiz_question_parent]

        quiz_question_type = block_type_map[quiz_question_parent]
        question_due = process_null(question_due)
        # array_quiz = [question_id, quiz_question_type, question_weight, question_due]
        # quiz_question_record.append(array_quiz)
        sql = "insert into quiz_questions(question_id, question_type, question_weight, question_due) values (%s,%s,%s,%s)"
        data = (question_id, quiz_question_type, question_weight, question_due)
        cursor.execute(sql, data)

    # Processing events data
    submission_event_collection = []

    # Problem check
    submission_event_collection.append("problem_check")  # Server
    '''
    submission_event_collection.append("save_problem_check")
    submission_event_collection.append("problem_check_fail")
    submission_event_collection.append("save_problem_check_fail")
    
    # The server emits a problem_graded event each time a user selects Check for a problem and it is graded success- fully.
    submission_event_collection.append("problem_graded")
    
    # The server emits problem_rescore events when a problem is successfully rescored.
    submission_event_collection.append("problem_rescore")
    submission_event_collection.append("problem_rescore_fail")
    
    submission_event_collection.append("problem_reset") # event_source: serve
    submission_event_collection.append("reset_problem")
    submission_event_collection.append("reset_problem_fail")
    
    # The server emits problem_save events after a user saves a problem.
    submission_event_collection.append("problem_save") # event_source: server
    submission_event_collection.append("save_problem_fail")
    submission_event_collection.append("save_problem_success")
    
    # Show answer
    submission_event_collection.append("problem_show")
    submission_event_collection.append("showanswer")
    '''

    current_date = course_metadata_map["start_date"]
    end_next_date = getNextDay(course_metadata_map["end_date"])

    log_files = os.listdir(log_path)

    submission_uni_index = 0
    while True:

        if current_date == end_next_date:
            break

        for file in log_files:
            if current_date in file:

                print file

                input_file = open(log_path + file, "r")
                lines = input_file.readlines()

                for line in lines:

                    jsonObject = json.loads(line)

                    if jsonObject["event_type"] in submission_event_collection:

                        # Some daily logs don't have the "user_id" value
                        if "user_id" not in jsonObject["context"]:
                            continue

                        global_learner_id = jsonObject["context"]["user_id"]

                        if global_learner_id != "":

                            course_id = jsonObject["context"]["course_id"]
                            course_learner_id = course_id + "_" + str(
                                global_learner_id)

                            question_id = ""

                            grade = ""
                            max_grade = ""

                            event_time = jsonObject["time"]
                            event_time = event_time[0:19]
                            event_time = event_time.replace("T", " ")
                            event_time = datetime.datetime.strptime(
                                event_time, "%Y-%m-%d %H:%M:%S")

                            if isinstance(jsonObject["event"], dict):
                                question_id = jsonObject["event"]["problem_id"]

                                # The fields "grade" and "max_grade" are specific to submission event "problem_check"
                                if jsonObject["event"].has_key(
                                        "grade"
                                ) and jsonObject["event"].has_key("max_grade"):
                                    grade = jsonObject["event"]["grade"]
                                    max_grade = jsonObject["event"][
                                        "max_grade"]

                            if question_id != "":

                                submission_id = course_learner_id + "_" + question_id + "_" + str(
                                    submission_uni_index)
                                submission_uni_index = submission_uni_index + 1

                                # For submissions
                                # array_submission = [submission_id, course_learner_id, question_id, event_time]
                                # submissions[submission_id] = array_submission
                                submission_timestamp = event_time
                                sql = "insert into submissions(submission_id, course_learner_id, question_id, submission_timestamp) values (%s,%s,%s,%s)"
                                data = (submission_id, course_learner_id,
                                        question_id, submission_timestamp)
                                cursor.execute(sql, data)

                                # For assessments
                                if grade != "" and max_grade != "":
                                    # array_assessment = [submission_id, course_learner_id, max_grade, grade]
                                    # assessments[submission_id] = array_assessment
                                    assessment_id = submission_id
                                    sql = "insert into assessments(assessment_id, course_learner_id, max_grade, grade) values (%s,%s,%s,%s)"
                                    data = (assessment_id, course_learner_id,
                                            max_grade, grade)
                                    cursor.execute(sql, data)

        current_date = getNextDay(current_date)

    # submission_record = []
    # assessment_record = []

    # for submission_id in submissions.keys():
    #     submission_record.append(submissions[submission_id])

    # for assessment_id in assessments.keys():
    #     assessment_record.append(assessments[assessment_id])

    # Database version
    # Quiz_question table
    # for array in quiz_question_record:
    #     question_id = array[0]
    #     question_type = array[1]
    #     question_weight = array[2]
    #     question_due = array[3]
    #     sql = "insert into quiz_questions(question_id, question_type, question_weight, question_due) values"
    #     sql += "('%s','%s','%s','%s');" % (question_id, question_type, question_weight, question_due)
    #     cursor.execute(sql)

    # Submissions table
    # for array in submission_record:
    #     submission_id = array[0]
    #     course_learner_id = array[1]
    #     question_id = array[2]
    #     submission_timestamp = array[3]
    #     sql = "insert into submissions(submission_id, course_learner_id, question_id, submission_timestamp) values"
    #     sql += "('%s','%s','%s','%s');" % (submission_id, course_learner_id, question_id, submission_timestamp)
    #     cursor.execute(sql)

    # Submissions table
    # for array in assessment_record:
    #     assessment_id = array[0]
    #     course_learner_id = array[1]
    #     max_grade = array[2]
    #     grade = array[3]
    #     sql = "insert into assessments(assessment_id, course_learner_id, max_grade, grade) values"
    #     sql += "('%s','%s','%s','%s');" % (assessment_id, course_learner_id, max_grade, grade)
    #     cursor.execute(sql)
    ''' 
def video_interaction(metadata_path, log_path, cursor):
    
    # Collect course information
    course_metadata_map = ExtractCourseInformation(metadata_path)
    
    current_date = course_metadata_map["start_date"]   
    end_next_date = getNextDay(course_metadata_map["end_date"])
    
    video_interaction_map = {}
    
    # Video-related event types
    video_event_types = []

    video_event_types.append("hide_transcript")
    video_event_types.append("edx.video.transcript.hidden")
    
    video_event_types.append("edx.video.closed_captions.hidden")
    video_event_types.append("edx.video.closed_captions.shown")
    
    video_event_types.append("load_video")
    video_event_types.append("edx.video.loaded")
    
    video_event_types.append("pause_video")
    video_event_types.append("edx.video.paused")
    
    video_event_types.append("play_video")
    video_event_types.append("edx.video.played")
    
    video_event_types.append("seek_video")
    video_event_types.append("edx.video.position.changed")
    
    video_event_types.append("show_transcript")
    video_event_types.append("edx.video.transcript.shown")
    
    video_event_types.append("speed_change_video")
    
    video_event_types.append("stop_video")
    video_event_types.append("edx.video.stopped")
    
    video_event_types.append("video_hide_cc_menu")
    video_event_types.append("edx.video.language_menu.hidden")
    
    video_event_types.append("video_show_cc_menu")
    video_event_types.append("edx.video.language_menu.shown")
    
    '''
    # Navigation-related event types
    navigation_event_types = []
    navigation_event_types.append("page_close")
    navigation_event_types.append("seq_goto")
    navigation_event_types.append("seq_next")
    navigation_event_types.append("seq_prev")
    '''
    
    learner_video_event_logs = {}
    updated_learner_video_event_logs = {}
    
    log_files = os.listdir(log_path)
    
    while True:
        
        if current_date == end_next_date:
            break;
        
        for file in log_files:           
            if current_date in file:
                
                print file

                learner_video_event_logs.clear()
                learner_video_event_logs = updated_learner_video_event_logs.copy()
                updated_learner_video_event_logs.clear()
                
                # Course_learner_id set
                course_learner_id_set = set()
                for course_learner_id in learner_video_event_logs.keys():
                    course_learner_id_set.add(course_learner_id)
                
                input_file = open(log_path + file,"r")
                lines = input_file.readlines()
                        
                for line in lines:
                    
                    jsonObject = json.loads(line)
                    
                    if jsonObject["event_type"] in video_event_types:
                        
                        # Some daily logs don't have the "user_id" value
                        if "user_id" not in jsonObject["context"]:
                            continue
                        
                        global_learner_id = jsonObject["context"]["user_id"]
                        
                        if global_learner_id != "":
                            
                            course_id = jsonObject["context"]["course_id"]
                            course_learner_id = course_id + "_" + str(global_learner_id)
                            
                            video_id = ""
                        
                            event_time = jsonObject["time"]
                            event_time = event_time[0:19]
                            event_time = event_time.replace("T", " ")
                            event_time = datetime.datetime.strptime(event_time,"%Y-%m-%d %H:%M:%S")
                        
                            event_type = jsonObject["event_type"]
                        
                            # For seek event
                            new_time = 0
                            old_time = 0
                        
                            # For speed change event
                            new_speed = 0
                            old_speed = 0
                        
                            # This sub-condition does not exist in log data
                            # if isinstance(jsonObject["event"], dict):
                            #     video_id = jsonObject["event"]["id"]
                        
                            if isinstance(jsonObject["event"], unicode):
                                event_jsonObject = json.loads(jsonObject["event"])
                                video_id = event_jsonObject["id"]
                                
                                video_id = video_id.replace("-", "://", 1)
                                video_id = video_id.replace("-", "/")
                            
                                # For video seek event
                                if "new_time" in event_jsonObject and "old_time" in event_jsonObject:
                                    new_time = event_jsonObject["new_time"]
                                    old_time = event_jsonObject["old_time"]                                                                      
                                                                                
                                # For video speed change event           
                                if "new_speed" in event_jsonObject and "old_speed" in event_jsonObject:
                                    new_speed = event_jsonObject["new_speed"]
                                    old_speed = event_jsonObject["old_speed"]
                        
                            # To record video seek event                
                            if event_type in ["seek_video","edx.video.position.changed"]:
                                if new_time is not None and old_time is not None:
                                    if course_learner_id in course_learner_id_set:
                                        learner_video_event_logs[course_learner_id].append({"event_time":event_time, "event_type":event_type, "video_id":video_id, "new_time":new_time, "old_time":old_time})
                                    else:
                                        learner_video_event_logs[course_learner_id] = [{"event_time":event_time, "event_type":event_type, "video_id":video_id, "new_time":new_time, "old_time":old_time}]
                                        course_learner_id_set.add(course_learner_id)
                                continue
                        
                            # To record video speed change event                
                            if event_type in ["speed_change_video"]:
                                if course_learner_id in course_learner_id_set:
                                    learner_video_event_logs[course_learner_id].append({"event_time":event_time, "event_type":event_type, "video_id":video_id, "new_speed":new_speed, "old_speed":old_speed})
                                else:
                                    learner_video_event_logs[course_learner_id] = [{"event_time":event_time, "event_type":event_type, "video_id":video_id, "new_speed":new_speed, "old_speed":old_speed}]
                                    course_learner_id_set.add(course_learner_id)
                                continue                                                                      
                         
                            if course_learner_id in course_learner_id_set:
                                learner_video_event_logs[course_learner_id].append({"event_time":event_time, "event_type":event_type, "video_id":video_id})
                            else:
                                learner_video_event_logs[course_learner_id] = [{"event_time":event_time, "event_type":event_type, "video_id":video_id}]
                                course_learner_id_set.add(course_learner_id)
                    
                    # For non-video-related events                                    
                    if jsonObject["event_type"] not in video_event_types:
                        
                        # Some daily logs don't have the "user_id" value
                        if "user_id" not in jsonObject["context"]:
                            continue
                        
                        global_learner_id = jsonObject["context"]["user_id"]
                        
                        if global_learner_id != "":
                            course_id = jsonObject["context"]["course_id"]
                            course_learner_id = course_id + "_" + str(global_learner_id)                                  
                        
                            event_time = jsonObject["time"]
                            event_time = event_time[0:19]
                            event_time = event_time.replace("T", " ")
                            event_time = datetime.datetime.strptime(event_time,"%Y-%m-%d %H:%M:%S")
                        
                            event_type = jsonObject["event_type"]                  
                                                      
                            if course_learner_id in course_learner_id_set:
                                learner_video_event_logs[course_learner_id].append({"event_time":event_time, "event_type":event_type})
                            else:
                                learner_video_event_logs[course_learner_id] = [{"event_time":event_time, "event_type":event_type}]
                                course_learner_id_set.add(course_learner_id)
                                  
                for course_learner_id in learner_video_event_logs.keys():
                    
                    video_id = ""
                    
                    event_logs = learner_video_event_logs[course_learner_id]
                    
                    # Sorting
                    event_logs.sort(cmp=cmp_datetime, key=operator.itemgetter('event_time'))
                    
                    video_start_time = ""
                    final_time = ""
                    
                    # For video seek event
                    times_forward_seek = 0
                    duration_forward_seek = 0
                    times_backward_seek = 0
                    duration_backward_seek = 0
                    
                    # For video speed change event
                    speed_change_last_time = ""
                    times_speed_up = 0
                    times_speed_down = 0               
                    
                    # For video pause event                   
                    pause_check = False
                    pause_start_time = ""
                    duration_pause = 0                    
                                      
                    for log in event_logs:
                        
                        if log["event_type"] in ["play_video", "edx.video.played"]:
                            
                            video_start_time = log["event_time"]
                            video_id = log["video_id"]

                            if pause_check:
                                
                                duration_pause = (log["event_time"] - pause_start_time).seconds
                                video_interaction_id = course_learner_id + "_" + video_id + "_" + str(pause_start_time)
                                
                                if duration_pause > 2 and duration_pause < 600:
                                    if video_interaction_id in video_interaction_map.keys():
                                        video_interaction_map[video_interaction_id]["times_pause"] = 1                                        
                                        video_interaction_map[video_interaction_id]["duration_pause"] = duration_pause
                                
                                pause_check = False
                                                        
                            continue 
                        
                        if video_start_time != "":                                                    
                           
                            if log["event_time"] > video_start_time + datetime.timedelta(hours=0.5):
                                
                                video_start_time = ""
                                video_id = ""
                                final_time = log["event_time"]
                                
                            else:                               
                                
                                # 0. Seek
                                if log["event_type"] in ["seek_video", "edx.video.position.changed"] and video_id == log["video_id"]:                                                                       
                                    # Forward seek event
                                    if log["new_time"] > log["old_time"]:
                                        times_forward_seek += 1
                                        duration_forward_seek += log["new_time"] - log["old_time"]
                                    # Backward seek event                                    
                                    if log["new_time"] < log["old_time"]:
                                        times_backward_seek += 1
                                        duration_backward_seek += log["old_time"] - log["new_time"]
                                    continue
                                
                                # 1. Speed change
                                if log["event_type"] == "speed_change_video" and video_id == log["video_id"]:
                                    if speed_change_last_time == "":
                                        speed_change_last_time = log["event_time"]
                                        old_speed = log["old_speed"]
                                        new_speed = log["new_speed"]                                        
                                        if old_speed < new_speed:
                                            times_speed_up += 1
                                        if old_speed > new_speed:
                                            times_speed_down += 1
                                    else:
                                        if (log["event_time"] - speed_change_last_time).seconds > 10:
                                            old_speed = log["old_speed"]
                                            new_speed = log["new_speed"]                                        
                                            if old_speed < new_speed:
                                                times_speed_up += 1
                                            if old_speed > new_speed:
                                                times_speed_down += 1
                                        speed_change_last_time = log["event_time"]
                                    continue
                                
                                # 2. Pause/Stop situation
                                if log["event_type"] in ["pause_video", "edx.video.paused", "stop_video", "edx.video.stopped"] and video_id == log["video_id"]:                                    
                                    
                                    watch_duration = (log["event_time"] - video_start_time).seconds
                                    
                                    video_end_time = log["event_time"]
                                    video_interaction_id = course_learner_id + "_" + video_id + "_" + str(video_end_time)
                                 
                                    if watch_duration > 5:                                        
                                        video_interaction_map[video_interaction_id] = {"course_learner_id":course_learner_id, "video_id":video_id, "type": "video", "watch_duration":watch_duration,
                                                                        "times_forward_seek":times_forward_seek, "duration_forward_seek":duration_forward_seek, 
                                                                        "times_backward_seek":times_backward_seek, "duration_backward_seek":duration_backward_seek,
                                                                        "times_speed_up":times_speed_up, "times_speed_down":times_speed_down,
                                                                        "start_time":video_start_time, "end_time":video_end_time}

                                    if log["event_type"] in ["pause_video", "edx.video.paused"]:
                                        pause_check = True
                                        pause_start_time = video_end_time
                                    
                                    # For video seek event
                                    times_forward_seek = 0
                                    duration_forward_seek = 0
                                    times_backward_seek = 0
                                    duration_backward_seek = 0
                                    
                                    # For video speed change event
                                    speed_change_last_time = ""
                                    times_speed_up = 0
                                    times_speed_down = 0
                                    
                                    # For video general information                                  
                                    video_start_time =""
                                    video_id = ""
                                    final_time = log["event_time"]
                                    
                                    continue
                                    
                                # 3/4  Page changed/Session closed
                                if log["event_type"] not in video_event_types:
                                    
                                    video_end_time = log["event_time"]
                                    watch_duration = (video_end_time - video_start_time).seconds                
                                    video_interaction_id = course_learner_id + "_" + video_id + "_" + str(video_end_time)
                                
                                    if watch_duration > 5:                                        
                                        video_interaction_map[video_interaction_id] = {"course_learner_id":course_learner_id, "video_id":video_id, "type": "video", "watch_duration":watch_duration,
                                                                        "times_forward_seek":times_forward_seek, "duration_forward_seek":duration_forward_seek, 
                                                                        "times_backward_seek":times_backward_seek, "duration_backward_seek":duration_backward_seek,
                                                                        "times_speed_up":times_speed_up, "times_speed_down":times_speed_down,
                                                                        "start_time":video_start_time, "end_time":video_end_time}
                                    
                                    # For video seek event
                                    times_forward_seek = 0
                                    duration_forward_seek = 0
                                    times_backward_seek = 0
                                    duration_backward_seek = 0
                                    
                                    # For video speed change event
                                    speed_change_last_time = ""
                                    times_speed_up = 0
                                    times_speed_down = 0
                                    
                                    # For video general information
                                    video_start_time = ""                                    
                                    video_id = ""
                                    final_time = log["event_time"]
                                    
                                    continue
                        
                    if final_time != "":
                        new_logs = []                
                        for log in event_logs:                 
                            if log["event_time"] > final_time:
                                new_logs.append(log)
                                
                        updated_learner_video_event_logs[course_learner_id] = new_logs                
                     
        current_date = getNextDay(current_date)
        
    video_interaction_record = []
    
    for interaction_id in video_interaction_map.keys():
        video_interaction_id = interaction_id
        course_learner_id = video_interaction_map[interaction_id]["course_learner_id"]
        video_id = video_interaction_map[interaction_id]["video_id"]
        duration = video_interaction_map[interaction_id]["watch_duration"]
        times_forward_seek = video_interaction_map[interaction_id]["times_forward_seek"]
        duration_forward_seek = video_interaction_map[interaction_id]["duration_forward_seek"]
        times_backward_seek = video_interaction_map[interaction_id]["times_backward_seek"]
        duration_backward_seek = video_interaction_map[interaction_id]["duration_backward_seek"]
        times_speed_up = video_interaction_map[interaction_id]["times_speed_up"]
        times_speed_down = video_interaction_map[interaction_id]["times_speed_down"]
        start_time = video_interaction_map[interaction_id]["start_time"]
        end_time = video_interaction_map[interaction_id]["end_time"]
        
        if "times_pause" in video_interaction_map[interaction_id]:
            times_pause = video_interaction_map[interaction_id]["times_pause"]
            duration_pause = video_interaction_map[interaction_id]["duration_pause"]
        else:
            times_pause = 0
            duration_pause = 0
            
        array = [video_interaction_id, course_learner_id, video_id, duration, times_forward_seek, duration_forward_seek, times_backward_seek, duration_backward_seek, times_speed_up, times_speed_down, times_pause, duration_pause, start_time, end_time]
        video_interaction_record.append(array)
    
    # Video_interaction table
    # Database version
    for array in video_interaction_record:
        interaction_id = array[0]
        course_learner_id = array[1]
        video_id = array[2]
        duration = process_null(array[3])
        times_forward_seek = process_null(array[4])
        duration_forward_seek = process_null(array[5])
        times_backward_seek = process_null(array[6])
        duration_backward_seek = process_null(array[7])
        times_speed_up = process_null(array[8])
        times_speed_down = process_null(array[9])
        times_pause = process_null(array[10])
        duration_pause = process_null(array[11])
        start_time = array[12]
        end_time = array[13]
        sql = "insert into video_interaction(interaction_id, course_learner_id, video_id, duration, times_forward_seek, duration_forward_seek, times_backward_seek, duration_backward_seek, times_speed_up, times_speed_down, times_pause, duration_pause, start_time, end_time) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
        data = (interaction_id, course_learner_id, video_id, duration, times_forward_seek, duration_forward_seek, times_backward_seek, duration_backward_seek, times_speed_up, times_speed_down, times_pause, duration_pause, start_time, end_time)
        cursor.execute(sql, data)
        
    # File version
    '''
Exemple #13
0
def main(argv):
    
    # Read configs
    config = ConfigParser.ConfigParser()
    config.read(argv[0])
    
    # All the configs are read as string
    course_log_path = config.get("data", "path")
    remove_filtered_logs = config.get("data", "remove_filtered_logs")
    log_update_list = json.loads(config.get("data", "log_update_list"))
    metadata_update_list = json.loads(config.get("data", "metadata_update_list"))
    survey_update_map = json.loads(config.get("data", "survey_update_map"))
    
    user = config.get("mysqld", "user")
    password = config.get("mysqld", "password")
    host = config.get("mysqld", "host")
    database = config.get("mysqld", "database")
        
    # Database
    connection = mysql.connector.connect(user=user, password=password, host=host, database=database, charset='utf8mb4')
    cursor = connection.cursor()

    # Delete relevant records before updating the database
    print "Removing log records..."
    for course_code in log_update_list:
        print str("\t" + course_code)
        RemoveCourseRecords(course_log_path, course_code, "log", cursor)
    print "Removing metadata records..."
    for course_code in metadata_update_list:
        print str("\t" + course_code)
        RemoveCourseRecords(course_log_path, course_code, "metadata", cursor)
    print "Removing survey records..."
    for course_code in survey_update_map.keys():
        print str("\t" + course_code)
        RemoveCourseRecords(course_log_path, course_code, "survey", cursor)
        
    print
    
    folders = os.listdir(course_log_path)
    for folder in folders:
        if folder != "daily_logs":
                            
            # Only for Mac OS
            if folder == ".DS_Store":
                continue
                
            course_code = folder
            
            print "Processing\t" + course_code
                
            # A file named "course_processing_tracker" (JSON format) is created 
            # for each course to keep track of the processing files
            tracker_path = str(course_log_path + course_code + "/course_processing_tracker")
            if not os.path.exists(tracker_path):
                    
                output_file = open(tracker_path, "w")
                tracker_map = {}
                    
                # This value is used to keep track of the processing status for the course' daily log files, 
                # i.e., "False" (not finished yet) and "True" (finished)
                tracker_map["status"] = False
                    
                tracker_map["processed_dates"] = []
                tracker_map["num_processed_dates"] = 0                
                output_file.write(json.dumps(tracker_map))
                output_file.close()
                    
            # Read the "course_processing_tracker" file
            input_file =  open(tracker_path, "r")
            tracker_map = json.loads(input_file.read())
            input_file.close()
            
            metadata_path = str(course_log_path + course_code + "/metadata/")
            
            # Determine whether the course_structure file is present
            mark = False
            files = os.listdir(metadata_path)
            for file in files:
                if "course_structure" in file:
                    mark = True
                    break
            if not mark:
                print "The course structure file is missing.\n"
                continue
            
            # Learner mode
            if course_code in metadata_update_list:
                print "Learner Mode processing..."        
                learner_mode(metadata_path, course_code, cursor)
            
            # Survey mode
            survey_path = str(course_log_path + course_code + "/surveys/")
            if course_code in survey_update_map.keys():
                print "Survey Mode processing..."        
                pre_id_index = int(survey_update_map[course_code][0])
                post_id_index = int(survey_update_map[course_code][1])
                survey_mode(metadata_path, survey_path, cursor, pre_id_index, post_id_index)
                    
            if tracker_map["status"]:
                print
                continue
                            
            # Retrieve the start/end date of the course
            course_metadata_map = ExtractCourseInformation(metadata_path)
            course_id = course_metadata_map["course_id"]
            start_date = course_metadata_map["start_date"]
            end_date = course_metadata_map["end_date"]
                               
            current_date = start_date
            while current_date <= end_date:
                    
                current_date_string = str(current_date)[0:10]
                if current_date_string not in tracker_map["processed_dates"]:                  
                                            
                    daily_log_file = str("delftx-edx-events-" + current_date_string + ".log.gz")                           
                    if os.path.exists(str(course_log_path + "/daily_logs/" + daily_log_file)):
                                                   
                        print daily_log_file
                                                                            
                        # Decompress log files
                        unzip_file_path = str(course_log_path + course_code + "/unzip_daily_logs/")
                        if not os.path.exists(unzip_file_path):
                            os.mkdir(unzip_file_path)
                            
                        output_path = str(unzip_file_path + daily_log_file[0:-3])
                            
                        if not os.path.exists(output_path):                        
                            output_file = open(output_path, 'w')
                            with gzip.open(str(course_log_path + "/daily_logs/" + daily_log_file), 'r') as f:
                                for line in f:
                                    jsonObject = json.loads(line)
                                    if course_id in jsonObject["context"]["course_id"]:
                                        output_file.write(line)                
                            output_file.close()    
                                                  
                        daily_log_path = output_path
                        
                        # Video_interaction table
                        # print "1.\t Video_interaction table processing..."        
                        remaining_video_interaction_log_path = course_log_path + course_code + "/remaining_video_interaction_logs"
                        video_interaction(metadata_path, daily_log_path, remaining_video_interaction_log_path, cursor)
                        
                        # Quiz mode
                        # print "2.\t Quiz mode processing..."  
                        quiz_mode(daily_log_path, cursor)
                        
                        # Quiz_sessions table
                        # print "3.\t Quiz_sessions table processing..."  
                        remaining_quiz_session_log_path = course_log_path + course_code + "/remaining_quiz_session_logs"
                        quiz_sessions(metadata_path, daily_log_path, remaining_quiz_session_log_path, cursor)
                        
                        # Forum_interaction table
                        # print "4.\t Forum_interaction table processing..."  
                        forum_interaction(metadata_path, daily_log_path, cursor)
                            
                        # Forum_sessions table
                        # print "5.\t Forum_sessions table processing..."  
                        remaining_forum_session_log_path = course_log_path + course_code + "/remaining_forum_session_logs"
                        forum_sessions(metadata_path, daily_log_path, remaining_forum_session_log_path, cursor)                     
                                        
                        # Sessions table
                        # print "6.\t Sessions table processing..."  
                        remaining_session_log_path = course_log_path + course_code + "/remaining_session_logs"
                        sessions(metadata_path, daily_log_path, remaining_session_log_path, cursor)                    
                                                                            
                        tracker_map["processed_dates"].append(current_date_string)
                            
                current_date = getNextDay(current_date)
                                            
            if len(tracker_map["processed_dates"]) == getDayDiff(start_date, end_date) + 1:
                tracker_map["status"] = True
                    
            if tracker_map["num_processed_dates"] != len(tracker_map["processed_dates"]):                
                tracker_map["num_processed_dates"] = len(tracker_map["processed_dates"])                
                output_file = open(tracker_path, "w")
                output_file.write(json.dumps(tracker_map))
                output_file.close()
            
            # Delete the decompressed files
            if remove_filtered_logs == "1":
                log_files = os.listdir(str(course_log_path + "/daily_logs/"))
                for log_file in log_files:
                    os.remove(str(course_log_path + "/daily_logs/" + log_file))
                    
        print