def quiz_sessions(metadata_path, log_path, cursor): # Collect course information course_metadata_map = ExtractCourseInformation(metadata_path) # Processing events data submission_event_collection = [] # Problem check submission_event_collection.append("problem_check") # Server submission_event_collection.append("save_problem_check") submission_event_collection.append("problem_check_fail") submission_event_collection.append("save_problem_check_fail") # The server emits a problem_graded event each time a user selects Check for a problem and it is graded success- fully. submission_event_collection.append("problem_graded") # The server emits problem_rescore events when a problem is successfully rescored. submission_event_collection.append("problem_rescore") submission_event_collection.append("problem_rescore_fail") submission_event_collection.append("problem_reset") # event_source: serve submission_event_collection.append("reset_problem") submission_event_collection.append("reset_problem_fail") # The server emits problem_save events after a user saves a problem. submission_event_collection.append("problem_save") # event_source: server submission_event_collection.append("save_problem_fail") submission_event_collection.append("save_problem_success") # Show answer submission_event_collection.append("problem_show") submission_event_collection.append("showanswer") current_date = course_metadata_map["start_date"] end_next_date = getNextDay(course_metadata_map["end_date"]) log_files = os.listdir(log_path) child_parent_map = course_metadata_map["child_parent_map"] learner_all_event_logs = {} updated_learner_all_event_logs = {} quiz_sessions = {} while True: if current_date == end_next_date: break; for file in log_files: if current_date in file: print file learner_all_event_logs.clear() learner_all_event_logs = updated_learner_all_event_logs.copy() updated_learner_all_event_logs.clear() # Course_learner_id set course_learner_id_set = set() for course_learner_id in learner_all_event_logs.keys(): course_learner_id_set.add(course_learner_id) input_file = open(log_path + file,"r") lines = input_file.readlines() for line in lines: jsonObject = json.loads(line) global_learner_id = jsonObject["context"]["user_id"] event_type = str(jsonObject["event_type"]) if global_learner_id != "": course_id = jsonObject["context"]["course_id"] course_learner_id = course_id + "_" + str(global_learner_id) event_time = jsonObject["time"] event_time = event_time[0:19] event_time = event_time.replace("T", " ") event_time = datetime.datetime.strptime(event_time,"%Y-%m-%d %H:%M:%S") if learner_all_event_logs.has_key(course_learner_id): learner_all_event_logs[course_learner_id].append({"event_time":event_time, "event_type":event_type}) else: learner_all_event_logs[course_learner_id] = [{"event_time":event_time, "event_type":event_type}] # For quiz session separation for course_learner_id in learner_all_event_logs.keys(): event_logs = learner_all_event_logs[course_learner_id] # Sorting event_logs.sort(cmp=cmp_datetime, key=operator.itemgetter('event_time')) session_id = "" start_time = "" end_time = "" final_time = "" for i in range(len(event_logs)): if session_id == "": if "problem+block" in event_logs[i]["event_type"] or "_problem;_" in event_logs[i]["event_type"]: event_type_array = event_logs[i]["event_type"].split("/") if "problem+block" in event_logs[i]["event_type"]: question_id = event_type_array[4] if "_problem;_" in event_logs[i]["event_type"]: question_id = event_type_array[6].replace(";_", "/") if question_id in child_parent_map.keys(): parent_block_id = child_parent_map[question_id] session_id = "quiz_session_" + parent_block_id + "_" + course_learner_id start_time = event_logs[i]["event_time"] end_time = event_logs[i]["event_time"] else: if "problem+block" in event_logs[i]["event_type"] or "_problem;_" in event_logs[i]["event_type"] or event_logs[i]["event_type"] in submission_event_collection: if event_logs[i]["event_time"] > end_time + datetime.timedelta(hours=0.5): if quiz_sessions.has_key(session_id): quiz_sessions[session_id]["time_array"].append({"start_time":start_time, "end_time":end_time}) else: quiz_sessions[session_id] = {"course_learner_id":course_learner_id, "time_array":[{"start_time":start_time, "end_time":end_time}]} final_time = event_logs[i]["event_time"] if "problem+block" in event_logs[i]["event_type"] or "_problem;_" in event_logs[i]["event_type"]: event_type_array = event_logs[i]["event_type"].split("/") question_id = event_type_array[4] if question_id in child_parent_map.keys(): parent_block_id = child_parent_map[question_id] session_id = "quiz_session_" + parent_block_id + "_" +course_learner_id start_time = event_logs[i]["event_time"] end_time = event_logs[i]["event_time"] else: session_id = "" start_time = "" end_time = "" else: end_time = event_logs[i]["event_time"] else: end_time = event_logs[i]["event_time"] if quiz_sessions.has_key(session_id): quiz_sessions[session_id]["time_array"].append({"start_time":start_time, "end_time":end_time}) else: quiz_sessions[session_id] = {"course_learner_id":course_learner_id, "time_array":[{"start_time":start_time, "end_time":end_time}]} final_time = event_logs[i]["event_time"] session_id = "" start_time = "" end_time = "" if final_time != "": new_logs = [] for log in event_logs: if log["event_time"] >= final_time: new_logs.append(log) updated_learner_all_event_logs[course_learner_id] = new_logs current_date = getNextDay(current_date) # To compress the session event_logs for session_id in quiz_sessions.keys(): if len(quiz_sessions[session_id]["time_array"]) > 1: start_time = "" end_time = "" updated_time_array = [] for i in range(len(quiz_sessions[session_id]["time_array"])): if i == 0: start_time = quiz_sessions[session_id]["time_array"][i]["start_time"] end_time = quiz_sessions[session_id]["time_array"][i]["end_time"] else: if quiz_sessions[session_id]["time_array"][i]["start_time"] > end_time + datetime.timedelta(hours=0.5): updated_time_array.append({"start_time":start_time, "end_time":end_time}) start_time = quiz_sessions[session_id]["time_array"][i]["start_time"] end_time = quiz_sessions[session_id]["time_array"][i]["end_time"] else: end_time = quiz_sessions[session_id]["time_array"][i]["end_time"] if i == len(quiz_sessions[session_id]["time_array"]) - 1: updated_time_array.append({"start_time":start_time, "end_time":end_time}) quiz_sessions[session_id]["time_array"] = updated_time_array quiz_session_record = [] for session_id in quiz_sessions.keys(): course_learner_id = quiz_sessions[session_id]["course_learner_id"] for i in range(len(quiz_sessions[session_id]["time_array"])): start_time = quiz_sessions[session_id]["time_array"][i]["start_time"] end_time = quiz_sessions[session_id]["time_array"][i]["end_time"] if start_time < end_time: duration = (end_time - start_time).days * 24 * 60 * 60 + (end_time - start_time).seconds final_session_id = session_id + "_" + str(start_time) + "_" + str(end_time) if duration > 5: array = [final_session_id, course_learner_id, start_time, end_time, duration] quiz_session_record.append(array) # Database version for array in quiz_session_record: session_id = array[0] course_learner_id = array[1] start_time = array[2] end_time = array[3] duration = array[4] sql = "insert into quiz_sessions (session_id, course_learner_id, start_time, end_time, duration) values" sql += "('%s','%s','%s','%s','%s');" % (session_id, course_learner_id, start_time, end_time, duration) cursor.execute(sql) '''
def sessions(metadata_path, log_path, cursor): # Collect course information course_metadata_map = ExtractCourseInformation(metadata_path) current_date = course_metadata_map["start_date"] end_next_date = getNextDay(course_metadata_map["end_date"]) learner_all_event_logs = {} updated_learner_all_event_logs = {} session_record = [] log_files = os.listdir(log_path) while True: if current_date == end_next_date: break; for file in log_files: if current_date in file: print file learner_all_event_logs.clear() learner_all_event_logs = updated_learner_all_event_logs.copy() updated_learner_all_event_logs.clear() # Course_learner_id set course_learner_id_set = set() for course_learner_id in learner_all_event_logs.keys(): course_learner_id_set.add(course_learner_id) input_file = open(log_path + file,"r") lines = input_file.readlines() for line in lines: jsonObject = json.loads(line) global_learner_id = jsonObject["context"]["user_id"] event_type = str(jsonObject["event_type"]) if global_learner_id != "": course_id = jsonObject["context"]["course_id"] course_learner_id = course_id + "_" + str(global_learner_id) event_time = jsonObject["time"] event_time = event_time[0:19] event_time = event_time.replace("T", " ") event_time = datetime.datetime.strptime(event_time,"%Y-%m-%d %H:%M:%S") if course_learner_id in course_learner_id_set: learner_all_event_logs[course_learner_id].append({"event_time":event_time, "event_type":event_type}) else: learner_all_event_logs[course_learner_id] = [{"event_time":event_time, "event_type":event_type}] course_learner_id_set.add(course_learner_id) for course_learner_id in learner_all_event_logs.keys(): event_logs = learner_all_event_logs[course_learner_id] # Sorting event_logs.sort(cmp=cmp_datetime, key=operator.itemgetter('event_time')) session_id = "" start_time = "" end_time = "" final_time = "" for i in range(len(event_logs)): if start_time == "": # Initialization start_time = event_logs[i]["event_time"] end_time = event_logs[i]["event_time"] else: if event_logs[i]["event_time"] > end_time + datetime.timedelta(hours=0.5): session_id = course_learner_id + "_" + str(start_time) + "_" + str(end_time) duration = (end_time - start_time).days * 24 * 60 * 60 + (end_time - start_time).seconds if duration > 5: array = [session_id, course_learner_id, start_time, end_time, duration] session_record.append(array) final_time = event_logs[i]["event_time"] # Re-initialization session_id = "" start_time = event_logs[i]["event_time"] end_time = event_logs[i]["event_time"] else: if event_logs[i]["event_type"] == "page_close": end_time = event_logs[i]["event_time"] session_id = course_learner_id + "_" + str(start_time) + "_" + str(end_time) duration = (end_time - start_time).days * 24 * 60 * 60 + (end_time - start_time).seconds if duration > 5: array = [session_id, course_learner_id, start_time, end_time, duration] session_record.append(array) # Re-initialization session_id = "" start_time = "" end_time = "" final_time = event_logs[i]["event_time"] else: end_time = event_logs[i]["event_time"] if final_time != "": new_logs = [] for log in event_logs: if log["event_time"] >= final_time: new_logs.append(log) updated_learner_all_event_logs[course_learner_id] = new_logs current_date = getNextDay(current_date) # Filter duplicated records updated_session_record = [] session_id_set = set() for array in session_record: session_id = array[0] if session_id not in session_id_set: session_id_set.add(session_id) updated_session_record.append(array) session_record = updated_session_record # Database version for array in session_record: session_id = array[0] course_learner_id = array[1] start_time = array[2] end_time = array[3] duration = array[4] sql = "insert into sessions(session_id, course_learner_id, start_time, end_time, duration) values" sql += "('%s','%s','%s','%s','%s');" % (session_id, course_learner_id, start_time, end_time, duration) cursor.execute(sql) # File version '''
def video_interaction(metadata_path, log_path, cursor): # Collect course information course_metadata_map = ExtractCourseInformation(metadata_path) current_date = course_metadata_map["start_date"] end_next_date = getNextDay(course_metadata_map["end_date"]) video_interaction_map = {} # Video-related event types video_event_types = [] video_event_types.append("play_video") video_event_types.append("edx.video.played") video_event_types.append("stop_video") video_event_types.append("edx.video.stopped") video_event_types.append("pause_video") video_event_types.append("edx.video.paused") video_event_types.append("seek_video") video_event_types.append("edx.video.position.changed") video_event_types.append("speed_change_video") # Navigation-related event types navigation_event_types = [] navigation_event_types.append("page_close") navigation_event_types.append("seq_goto") navigation_event_types.append("seq_next") navigation_event_types.append("seq_prev") learner_video_event_logs = {} updated_learner_video_event_logs = {} log_files = os.listdir(log_path) while True: if current_date == end_next_date: break; for file in log_files: if current_date in file: print file learner_video_event_logs.clear() learner_video_event_logs = updated_learner_video_event_logs.copy() updated_learner_video_event_logs.clear() # Course_learner_id set course_learner_id_set = set() for course_learner_id in learner_video_event_logs.keys(): course_learner_id_set.add(course_learner_id) input_file = open(log_path + file,"r") lines = input_file.readlines() for line in lines: jsonObject = json.loads(line) if jsonObject["event_type"] in video_event_types: global_learner_id = jsonObject["context"]["user_id"] if global_learner_id != "": course_id = jsonObject["context"]["course_id"] course_learner_id = course_id + "_" + str(global_learner_id) video_id = "" event_time = jsonObject["time"] event_time = event_time[0:19] event_time = event_time.replace("T", " ") event_time = datetime.datetime.strptime(event_time,"%Y-%m-%d %H:%M:%S") event_type = jsonObject["event_type"] # For seek event new_time = 0 old_time = 0 # For speed change event new_speed = 0 old_speed = 0 # This sub-condition does not exist in log data # if isinstance(jsonObject["event"], dict): # video_id = jsonObject["event"]["id"] if isinstance(jsonObject["event"], unicode): event_jsonObject = json.loads(jsonObject["event"]) video_id = event_jsonObject["id"] video_id = video_id.replace("-", "://", 1) video_id = video_id.replace("-", "/") # For video seek event if "new_time" in event_jsonObject and "old_time" in event_jsonObject: new_time = event_jsonObject["new_time"] old_time = event_jsonObject["old_time"] # For video speed change event if "new_speed" in event_jsonObject and "old_speed" in event_jsonObject: new_speed = event_jsonObject["new_speed"] old_speed = event_jsonObject["old_speed"] # To record video seek event if event_type in ["seek_video","edx.video.position.changed"]: if new_time is not None and old_time is not None: if course_learner_id in course_learner_id_set: learner_video_event_logs[course_learner_id].append({"event_time":event_time, "event_type":event_type, "video_id":video_id, "new_time":new_time, "old_time":old_time}) else: learner_video_event_logs[course_learner_id] = [{"event_time":event_time, "event_type":event_type, "video_id":video_id, "new_time":new_time, "old_time":old_time}] course_learner_id_set.add(course_learner_id) continue # To record video speed change event if event_type in ["speed_change_video"]: if course_learner_id in course_learner_id_set: learner_video_event_logs[course_learner_id].append({"event_time":event_time, "event_type":event_type, "video_id":video_id, "new_speed":new_speed, "old_speed":old_speed}) else: learner_video_event_logs[course_learner_id] = [{"event_time":event_time, "event_type":event_type, "video_id":video_id, "new_speed":new_speed, "old_speed":old_speed}] course_learner_id_set.add(course_learner_id) continue if course_learner_id in course_learner_id_set: learner_video_event_logs[course_learner_id].append({"event_time":event_time, "event_type":event_type, "video_id":video_id}) else: learner_video_event_logs[course_learner_id] = [{"event_time":event_time, "event_type":event_type, "video_id":video_id}] course_learner_id_set.add(course_learner_id) # For navigation events if jsonObject["event_type"] in navigation_event_types: global_learner_id = jsonObject["context"]["user_id"] if global_learner_id != "": course_id = jsonObject["context"]["course_id"] course_learner_id = course_id + "_" + str(global_learner_id) event_time = jsonObject["time"] event_time = event_time[0:19] event_time = event_time.replace("T", " ") event_time = datetime.datetime.strptime(event_time,"%Y-%m-%d %H:%M:%S") event_type = jsonObject["event_type"] if course_learner_id in course_learner_id_set: learner_video_event_logs[course_learner_id].append({"event_time":event_time, "event_type":event_type}) else: learner_video_event_logs[course_learner_id] = [{"event_time":event_time, "event_type":event_type}] course_learner_id_set.add(course_learner_id) for course_learner_id in learner_video_event_logs.keys(): video_id = "" event_logs = learner_video_event_logs[course_learner_id] # Sorting event_logs.sort(cmp=cmp_datetime, key=operator.itemgetter('event_time')) video_start_time = "" final_time = "" # For video seek event times_forward_seek = 0 duration_forward_seek = 0 times_backward_seek = 0 duration_backward_seek = 0 # For video speed change event speed_change_last_time = "" times_speed_up = 0 times_speed_down = 0 # For video pause event pause_check = False pause_start_time = "" duration_pause = 0 for log in event_logs: if log["event_type"] in ["play_video", "edx.video.played"]: video_start_time = log["event_time"] video_id = log["video_id"] if pause_check: duration_pause = (log["event_time"] - pause_start_time).seconds video_interaction_id = course_learner_id + "_" + video_id + "_" + str(pause_start_time) if duration_pause > 2 and duration_pause < 600: if video_interaction_id in video_interaction_map.keys(): video_interaction_map[video_interaction_id]["times_pause"] = 1 video_interaction_map[video_interaction_id]["duration_pause"] = duration_pause pause_check = False continue if video_start_time != "": if log["event_time"] > video_start_time + datetime.timedelta(hours=0.5): video_start_time = "" video_id = "" final_time = log["event_time"] else: # 0. Seek if log["event_type"] in ["seek_video", "edx.video.position.changed"] and video_id == log["video_id"]: # Forward seek event if log["new_time"] > log["old_time"]: times_forward_seek += 1 duration_forward_seek += log["new_time"] - log["old_time"] # Backward seek event if log["new_time"] < log["old_time"]: times_backward_seek += 1 duration_backward_seek += log["old_time"] - log["new_time"] continue # 1. Speed change if log["event_type"] == "speed_change_video" and video_id == log["video_id"]: if speed_change_last_time == "": speed_change_last_time = log["event_time"] old_speed = log["old_speed"] new_speed = log["new_speed"] if old_speed < new_speed: times_speed_up += 1 if old_speed > new_speed: times_speed_down += 1 else: if (log["event_time"] - speed_change_last_time).seconds > 10: old_speed = log["old_speed"] new_speed = log["new_speed"] if old_speed < new_speed: times_speed_up += 1 if old_speed > new_speed: times_speed_down += 1 speed_change_last_time = log["event_time"] continue # 2. Pause/Stop situation if log["event_type"] in ["pause_video", "edx.video.paused", "stop_video", "edx.video.stopped"] and video_id == log["video_id"]: watch_duration = (log["event_time"] - video_start_time).seconds video_end_time = log["event_time"] video_interaction_id = course_learner_id + "_" + video_id + "_" + str(video_end_time) if watch_duration > 5: video_interaction_map[video_interaction_id] = {"course_learner_id":course_learner_id, "video_id":video_id, "type": "video", "watch_duration":watch_duration, "times_forward_seek":times_forward_seek, "duration_forward_seek":duration_forward_seek, "times_backward_seek":times_backward_seek, "duration_backward_seek":duration_backward_seek, "times_speed_up":times_speed_up, "times_speed_down":times_speed_down, "start_time":video_start_time, "end_time":video_end_time} if log["event_type"] in ["pause_video", "edx.video.paused"]: pause_check = True pause_start_time = video_end_time # For video seek event times_forward_seek = 0 duration_forward_seek = 0 times_backward_seek = 0 duration_backward_seek = 0 # For video speed change event speed_change_last_time = "" times_speed_up = 0 times_speed_down = 0 # For video general information video_start_time ="" video_id = "" final_time = log["event_time"] continue # 3/4 Page changed/Session closed if log["event_type"] in navigation_event_types: video_end_time = log["event_time"] watch_duration = (video_end_time - video_start_time).seconds video_interaction_id = course_learner_id + "_" + video_id + "_" + str(video_end_time) if watch_duration > 5: video_interaction_map[video_interaction_id] = {"course_learner_id":course_learner_id, "video_id":video_id, "type": "video", "watch_duration":watch_duration, "times_forward_seek":times_forward_seek, "duration_forward_seek":duration_forward_seek, "times_backward_seek":times_backward_seek, "duration_backward_seek":duration_backward_seek, "times_speed_up":times_speed_up, "times_speed_down":times_speed_down, "start_time":video_start_time, "end_time":video_end_time} # For video seek event times_forward_seek = 0 duration_forward_seek = 0 times_backward_seek = 0 duration_backward_seek = 0 # For video speed change event speed_change_last_time = "" times_speed_up = 0 times_speed_down = 0 # For video general information video_start_time = "" video_id = "" final_time = log["event_time"] continue if final_time != "": new_logs = [] for log in event_logs: if log["event_time"] > final_time: new_logs.append(log) updated_learner_video_event_logs[course_learner_id] = new_logs current_date = getNextDay(current_date) video_interaction_record = [] for interaction_id in video_interaction_map.keys(): video_interaction_id = interaction_id course_learner_id = video_interaction_map[interaction_id]["course_learner_id"] video_id = video_interaction_map[interaction_id]["video_id"] duration = video_interaction_map[interaction_id]["watch_duration"] times_forward_seek = video_interaction_map[interaction_id]["times_forward_seek"] duration_forward_seek = video_interaction_map[interaction_id]["duration_forward_seek"] times_backward_seek = video_interaction_map[interaction_id]["times_backward_seek"] duration_backward_seek = video_interaction_map[interaction_id]["duration_backward_seek"] times_speed_up = video_interaction_map[interaction_id]["times_speed_up"] times_speed_down = video_interaction_map[interaction_id]["times_speed_down"] start_time = video_interaction_map[interaction_id]["start_time"] end_time = video_interaction_map[interaction_id]["end_time"] if "times_pause" in video_interaction_map[interaction_id]: times_pause = video_interaction_map[interaction_id]["watch_duration"] duration_pause = video_interaction_map[interaction_id]["watch_duration"] else: times_pause = 0 duration_pause = 0 array = [video_interaction_id, course_learner_id, video_id, duration, times_forward_seek, duration_forward_seek, times_backward_seek, duration_backward_seek, times_speed_up, times_speed_down, times_pause, duration_pause, start_time, end_time] video_interaction_record.append(array) # Video_interaction table # Database version for array in video_interaction_record: interaction_id = array[0] course_learner_id = array[1] video_id = array[2] duration = array[3] times_forward_seek = array[4] duration_forward_seek = array[5] times_backward_seek = array[6] duration_backward_seek = array[7] times_speed_up = array[8] times_speed_down = array[9] times_pause = array[10] duration_pause = array[11] start_time = array[12] end_time = array[13] sql = "insert into video_interaction(interaction_id, course_learner_id, video_id, duration, times_forward_seek, duration_forward_seek, times_backward_seek, duration_backward_seek, times_speed_up, times_speed_down, times_pause, duration_pause, start_time, end_time) values" sql += "('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s');" % (interaction_id, course_learner_id, video_id, duration, times_forward_seek, duration_forward_seek, times_backward_seek, duration_backward_seek, times_speed_up, times_speed_down, times_pause, duration_pause, start_time, end_time) cursor.execute(sql) # File version '''
def quiz_mode(metadata_path, log_path, cursor): quiz_question_record = [] submissions = {} assessments = {} # Collect course information course_metadata_map = ExtractCourseInformation(metadata_path) quiz_question_array = course_metadata_map["quiz_question_array"] block_type_map = course_metadata_map["block_type_map"] for question_id in quiz_question_array: quiz_question_parent = course_metadata_map["child_parent_map"][question_id] while not block_type_map.has_key(quiz_question_parent): quiz_question_parent = course_metadata_map["child_parent_map"][quiz_question_parent] quiz_question_type = block_type_map[quiz_question_parent] array = [question_id, quiz_question_type] quiz_question_record.append(array) # Processing events data submission_event_collection = [] # Problem check submission_event_collection.append("problem_check") # Server submission_event_collection.append("save_problem_check") submission_event_collection.append("problem_check_fail") submission_event_collection.append("save_problem_check_fail") # The server emits a problem_graded event each time a user selects Check for a problem and it is graded success- fully. submission_event_collection.append("problem_graded") # The server emits problem_rescore events when a problem is successfully rescored. submission_event_collection.append("problem_rescore") submission_event_collection.append("problem_rescore_fail") submission_event_collection.append("problem_reset") # event_source: serve submission_event_collection.append("reset_problem") submission_event_collection.append("reset_problem_fail") # The server emits problem_save events after a user saves a problem. submission_event_collection.append("problem_save") # event_source: server submission_event_collection.append("save_problem_fail") submission_event_collection.append("save_problem_success") # Show answer submission_event_collection.append("problem_show") submission_event_collection.append("showanswer") current_date = course_metadata_map["start_date"] end_next_date = getNextDay(course_metadata_map["end_date"]) log_files = os.listdir(log_path) while True: if current_date == end_next_date: break; for file in log_files: if current_date in file: print file input_file = open(log_path + file,"r") lines = input_file.readlines() for line in lines: jsonObject = json.loads(line) if jsonObject["event_type"] in submission_event_collection: global_learner_id = jsonObject["context"]["user_id"] if global_learner_id != "": course_id = jsonObject["context"]["course_id"] course_learner_id = course_id + "_" + str(global_learner_id) question_id = "" grade = "" max_grade = "" event_time = jsonObject["time"] event_time = event_time[0:19] event_time = event_time.replace("T", " ") event_time = datetime.datetime.strptime(event_time,"%Y-%m-%d %H:%M:%S") if isinstance(jsonObject["event"], dict): question_id = jsonObject["event"]["problem_id"] # The fields "grade" and "max_grade" are specific to submission event "problem_check" if jsonObject["event"].has_key("grade") and jsonObject["event"].has_key("max_grade"): grade = jsonObject["event"]["grade"] max_grade = jsonObject["event"]["max_grade"] if question_id != "": submission_id = course_learner_id + "_" + question_id # For submissions array = [submission_id, course_learner_id, question_id, event_time] submissions[submission_id] = array # For assessments if grade != "" and max_grade != "": array = [submission_id, course_learner_id, max_grade, grade] assessments[submission_id] = array current_date = getNextDay(current_date) submission_record = [] assessment_record = [] for submission_id in submissions.keys(): submission_record.append(submissions[submission_id]) for assessment_id in assessments.keys(): assessment_record.append(assessments[assessment_id]) # Database version # Quiz_question table for array in quiz_question_record: question_id = array[0] question_type = array[1] sql = "insert into quiz_questions(question_id, question_type) values" sql += "('%s','%s');" % (question_id, question_type) cursor.execute(sql) # Submissions table for array in submission_record: submission_id = array[0] course_learner_id = array[1] question_id = array[2] submission_timestamp = array[3] sql = "insert into submissions(submission_id, course_learner_id, question_id, submission_timestamp) values" sql += "('%s','%s','%s','%s');" % (submission_id, course_learner_id, question_id, event_time) cursor.execute(sql) # Submissions table for array in assessment_record: assessment_id = array[0] course_learner_id = array[1] max_grade = array[2] grade = array[3] sql = "insert into assessments(assessment_id, course_learner_id, max_grade, grade) values" sql += "('%s','%s','%s','%s');" % (assessment_id, course_learner_id, max_grade, grade) cursor.execute(sql) '''
def forum_sessions(metadata_path, log_path, cursor): # Collect course information course_metadata_map = ExtractCourseInformation(metadata_path) start_date = course_metadata_map["start_date"] end_date = course_metadata_map["end_date"] current_date = start_date end_next_date = getNextDay(end_date) forum_event_types = [] forum_event_types.append("edx.forum.comment.created") forum_event_types.append("edx.forum.response.created") forum_event_types.append("edx.forum.response.voted") forum_event_types.append("edx.forum.thread.created") forum_event_types.append("edx.forum.thread.voted") forum_event_types.append("edx.forum.searched") learner_all_event_logs = {} updated_learner_all_event_logs = {} forum_sessions_record = [] log_files = os.listdir(log_path) while True: if current_date == end_next_date: break; for log_file in log_files: if current_date in log_file: print log_file learner_all_event_logs.clear() learner_all_event_logs = updated_learner_all_event_logs.copy() updated_learner_all_event_logs.clear() # Course_learner_id set course_learner_id_set = set() for course_learner_id in learner_all_event_logs.keys(): course_learner_id_set.add(course_learner_id) log_file = open(log_path + log_file,"r") lines = log_file.readlines() for line in lines: jsonObject = json.loads(line) # Some daily logs don't have the "user_id" value if "user_id" not in jsonObject["context"]: continue if jsonObject["context"]["user_id"] == "": continue # For forum session separation global_learner_id = jsonObject["context"]["user_id"] event_type = str(jsonObject["event_type"]) if "/discussion/" in event_type or event_type in forum_event_types: if event_type != "edx.forum.searched": event_type = "forum_activity" if global_learner_id != "": course_id = jsonObject["context"]["course_id"] course_learner_id = course_id + "_" + str(global_learner_id) event_time = jsonObject["time"] event_time = event_time[0:19] event_time = event_time.replace("T", " ") event_time = datetime.datetime.strptime(event_time,"%Y-%m-%d %H:%M:%S") # added for relevant elements event_page = "" if jsonObject.has_key("page"): event_page = str(jsonObject["page"]) event_path = "" if jsonObject.has_key("path"): event_path = str(jsonObject["path"]) event_referer = "" if jsonObject.has_key("referer"): event_referer = str(jsonObject["referer"]) if course_learner_id in course_learner_id_set: learner_all_event_logs[course_learner_id].append({"event_time":event_time, "event_type":event_type, "page":event_page, "path":event_path, "referer":event_referer}) else: learner_all_event_logs[course_learner_id] = [{"event_time":event_time, "event_type":event_type, "page":event_page, "path":event_path, "referer":event_referer}] course_learner_id_set.add(course_learner_id) # For forum session separation for learner in learner_all_event_logs.keys(): course_learner_id = learner event_logs = learner_all_event_logs[learner] course_id = course_learner_id.split("_")[0] # Sorting event_logs.sort(cmp=cmp_datetime, key=operator.itemgetter('event_time')) session_id = "" start_time = "" end_time = "" times_search = 0 final_time = "" # represent the elements which just before the session. session_rel_element_pre = "" # represent the elements which is mentioned in the session. session_rel_element_cur = "" for i in range(len(event_logs)): rel_element_cur = courseElementsFinder(event_logs[i], course_id) if session_id == "": if event_logs[i]["event_type"] in ["forum_activity", "edx.forum.searched"]: # Initialization session_id = "forum_session_" + course_learner_id start_time = event_logs[i]["event_time"] end_time = event_logs[i]["event_time"] if event_logs[i]["event_type"] == "edx.forum.searched": times_search += 1 # Added for relevant element id session_rel_element_cur = rel_element_cur else: if event_logs[i]["event_type"] in ["forum_activity", "edx.forum.searched"]: if event_logs[i]["event_time"] > end_time + datetime.timedelta(hours=0.5): session_id = session_id + "_" + str(start_time) + "_" + str(end_time) duration = (end_time - start_time).days * 24 * 60 * 60 + (end_time - start_time).seconds if duration > 5: rel_element_id = "" if session_rel_element_cur != "": rel_element_id = session_rel_element_cur else: rel_element_id = session_rel_element_pre array = [session_id, course_learner_id, times_search, start_time, end_time, duration, rel_element_id] forum_sessions_record.append(array) final_time = event_logs[i]["event_time"] # Re-initialization session_id = "forum_session_" + course_learner_id start_time = event_logs[i]["event_time"] end_time = event_logs[i]["event_time"] if event_logs[i]["event_type"] == "edx.forum.searched": times_search = 1 # Added for relevant element id session_rel_element_cur = rel_element_cur else: end_time = event_logs[i]["event_time"] if event_logs[i]["event_type"] == "edx.forum.searched": times_search += 1 if session_rel_element_cur == "": session_rel_element_cur = rel_element_cur else: if event_logs[i]["event_time"] <= end_time + datetime.timedelta(hours=0.5): end_time = event_logs[i]["event_time"] session_id = session_id + "_" + str(start_time) + "_" + str(end_time) duration = (end_time - start_time).days * 24 * 60 * 60 + (end_time - start_time).seconds if duration > 5: rel_element_id = "" if session_rel_element_cur != "": rel_element_id = session_rel_element_cur else: rel_element_id = session_rel_element_pre array = [session_id, course_learner_id, times_search, start_time, end_time, duration, rel_element_id] forum_sessions_record.append(array) final_time = event_logs[i]["event_time"] # Re-initialization session_id = "" start_time = "" end_time = "" times_search = 0 # session_rel_element_pre is used for recording the element id # of the most recent event logs before the session logs if rel_element_cur != "": session_rel_element_pre = rel_element_cur if final_time != "": new_logs = [] for log in event_logs: if log["event_time"] >= final_time: new_logs.append(log) updated_learner_all_event_logs[course_learner_id] = new_logs log_file.close() current_date = getNextDay(current_date) # Database version for array in forum_sessions_record: session_id = array[0] course_learner_id = array[1] times_search = process_null(array[2]) start_time = array[3] end_time = array[4] duration = process_null(array[5]) rel_element_id = array[6] sql = "insert into forum_sessions (session_id, course_learner_id, times_search, start_time, end_time, duration, relevent_element_id) values (%s,%s,%s,%s,%s,%s,%s)" data = (session_id, course_learner_id, times_search, start_time, end_time, duration, rel_element_id) cursor.execute(sql, data) # File version '''
def quiz_mode(metadata_path, log_path, cursor): # quiz_question_record = [] # submissions = {} # assessments = {} # Collect course information course_metadata_map = ExtractCourseInformation(metadata_path) quiz_question_map = course_metadata_map["quiz_question_map"] block_type_map = course_metadata_map["block_type_map"] element_time_map_due = course_metadata_map["element_time_map_due"] for question_id in quiz_question_map: question_due = "" question_weight = quiz_question_map[question_id] quiz_question_parent = course_metadata_map["child_parent_map"][question_id] if (question_due == "") and (quiz_question_parent in element_time_map_due): question_due = element_time_map_due[quiz_question_parent] while not block_type_map.has_key(quiz_question_parent): quiz_question_parent = course_metadata_map["child_parent_map"][quiz_question_parent] if (question_due == "") and (quiz_question_parent in element_time_map_due): question_due = element_time_map_due[quiz_question_parent] quiz_question_type = block_type_map[quiz_question_parent] question_due = process_null(question_due) # array_quiz = [question_id, quiz_question_type, question_weight, question_due] # quiz_question_record.append(array_quiz) sql = "insert into quiz_questions(question_id, question_type, question_weight, question_due) values (%s,%s,%s,%s)" data = (question_id, quiz_question_type, question_weight, question_due) cursor.execute(sql, data) # Processing events data submission_event_collection = [] # Problem check submission_event_collection.append("problem_check") # Server ''' submission_event_collection.append("save_problem_check") submission_event_collection.append("problem_check_fail") submission_event_collection.append("save_problem_check_fail") # The server emits a problem_graded event each time a user selects Check for a problem and it is graded success- fully. submission_event_collection.append("problem_graded") # The server emits problem_rescore events when a problem is successfully rescored. submission_event_collection.append("problem_rescore") submission_event_collection.append("problem_rescore_fail") submission_event_collection.append("problem_reset") # event_source: serve submission_event_collection.append("reset_problem") submission_event_collection.append("reset_problem_fail") # The server emits problem_save events after a user saves a problem. submission_event_collection.append("problem_save") # event_source: server submission_event_collection.append("save_problem_fail") submission_event_collection.append("save_problem_success") # Show answer submission_event_collection.append("problem_show") submission_event_collection.append("showanswer") ''' current_date = course_metadata_map["start_date"] end_next_date = getNextDay(course_metadata_map["end_date"]) log_files = os.listdir(log_path) submission_uni_index = 0 while True: if current_date == end_next_date: break; for file in log_files: if current_date in file: print file input_file = open(log_path + file,"r") lines = input_file.readlines() for line in lines: jsonObject = json.loads(line) if jsonObject["event_type"] in submission_event_collection: # Some daily logs don't have the "user_id" value if "user_id" not in jsonObject["context"]: continue global_learner_id = jsonObject["context"]["user_id"] if global_learner_id != "": course_id = jsonObject["context"]["course_id"] course_learner_id = course_id + "_" + str(global_learner_id) question_id = "" grade = "" max_grade = "" event_time = jsonObject["time"] event_time = event_time[0:19] event_time = event_time.replace("T", " ") event_time = datetime.datetime.strptime(event_time,"%Y-%m-%d %H:%M:%S") if isinstance(jsonObject["event"], dict): question_id = jsonObject["event"]["problem_id"] # The fields "grade" and "max_grade" are specific to submission event "problem_check" if jsonObject["event"].has_key("grade") and jsonObject["event"].has_key("max_grade"): grade = jsonObject["event"]["grade"] max_grade = jsonObject["event"]["max_grade"] if question_id != "": submission_id = course_learner_id + "_" + question_id + "_" + str(submission_uni_index) submission_uni_index = submission_uni_index + 1 # For submissions # array_submission = [submission_id, course_learner_id, question_id, event_time] # submissions[submission_id] = array_submission submission_timestamp = event_time sql = "insert into submissions(submission_id, course_learner_id, question_id, submission_timestamp) values (%s,%s,%s,%s)" data = (submission_id, course_learner_id, question_id, submission_timestamp) cursor.execute(sql, data) # For assessments if grade != "" and max_grade != "": # array_assessment = [submission_id, course_learner_id, max_grade, grade] # assessments[submission_id] = array_assessment assessment_id = submission_id sql = "insert into assessments(assessment_id, course_learner_id, max_grade, grade) values (%s,%s,%s,%s)" data = (assessment_id, course_learner_id, max_grade, grade) cursor.execute(sql, data) current_date = getNextDay(current_date) # submission_record = [] # assessment_record = [] # for submission_id in submissions.keys(): # submission_record.append(submissions[submission_id]) # for assessment_id in assessments.keys(): # assessment_record.append(assessments[assessment_id]) # Database version # Quiz_question table # for array in quiz_question_record: # question_id = array[0] # question_type = array[1] # question_weight = array[2] # question_due = array[3] # sql = "insert into quiz_questions(question_id, question_type, question_weight, question_due) values" # sql += "('%s','%s','%s','%s');" % (question_id, question_type, question_weight, question_due) # cursor.execute(sql) # Submissions table # for array in submission_record: # submission_id = array[0] # course_learner_id = array[1] # question_id = array[2] # submission_timestamp = array[3] # sql = "insert into submissions(submission_id, course_learner_id, question_id, submission_timestamp) values" # sql += "('%s','%s','%s','%s');" % (submission_id, course_learner_id, question_id, submission_timestamp) # cursor.execute(sql) # Submissions table # for array in assessment_record: # assessment_id = array[0] # course_learner_id = array[1] # max_grade = array[2] # grade = array[3] # sql = "insert into assessments(assessment_id, course_learner_id, max_grade, grade) values" # sql += "('%s','%s','%s','%s');" % (assessment_id, course_learner_id, max_grade, grade) # cursor.execute(sql) '''
def forum_sessions(metadata_path, log_path, cursor): # Collect course information course_metadata_map = ExtractCourseInformation(metadata_path) start_date = course_metadata_map["start_date"] end_date = course_metadata_map["end_date"] current_date = start_date end_next_date = getNextDay(end_date) forum_event_types = [] forum_event_types.append("edx.forum.comment.created") forum_event_types.append("edx.forum.response.created") forum_event_types.append("edx.forum.response.voted") forum_event_types.append("edx.forum.thread.created") forum_event_types.append("edx.forum.thread.voted") forum_event_types.append("edx.forum.searched") learner_all_event_logs = {} updated_learner_all_event_logs = {} forum_sessions_record = [] log_files = os.listdir(log_path) while True: if current_date == end_next_date: break for log_file in log_files: if current_date in log_file: print log_file learner_all_event_logs.clear() learner_all_event_logs = updated_learner_all_event_logs.copy() updated_learner_all_event_logs.clear() # Course_learner_id set course_learner_id_set = set() for course_learner_id in learner_all_event_logs.keys(): course_learner_id_set.add(course_learner_id) log_file = open(log_path + log_file, "r") lines = log_file.readlines() for line in lines: jsonObject = json.loads(line) # Some daily logs don't have the "user_id" value if "user_id" not in jsonObject["context"]: continue if jsonObject["context"]["user_id"] == "": continue # For forum session separation global_learner_id = jsonObject["context"]["user_id"] event_type = str(jsonObject["event_type"]) if "/discussion/" in event_type or event_type in forum_event_types: if event_type != "edx.forum.searched": event_type = "forum_activity" if global_learner_id != "": course_id = jsonObject["context"]["course_id"] course_learner_id = course_id + "_" + str( global_learner_id) event_time = jsonObject["time"] event_time = event_time[0:19] event_time = event_time.replace("T", " ") event_time = datetime.datetime.strptime( event_time, "%Y-%m-%d %H:%M:%S") # added for relevant elements event_page = "" if jsonObject.has_key("page"): event_page = str(jsonObject["page"]) event_path = "" if jsonObject.has_key("path"): event_path = str(jsonObject["path"]) event_referer = "" if jsonObject.has_key("referer"): event_referer = str(jsonObject["referer"]) if course_learner_id in course_learner_id_set: learner_all_event_logs[course_learner_id].append({ "event_time": event_time, "event_type": event_type, "page": event_page, "path": event_path, "referer": event_referer }) else: learner_all_event_logs[course_learner_id] = [{ "event_time": event_time, "event_type": event_type, "page": event_page, "path": event_path, "referer": event_referer }] course_learner_id_set.add(course_learner_id) # For forum session separation for learner in learner_all_event_logs.keys(): course_learner_id = learner event_logs = learner_all_event_logs[learner] course_id = course_learner_id.split("_")[0] # Sorting event_logs.sort(cmp=cmp_datetime, key=operator.itemgetter('event_time')) session_id = "" start_time = "" end_time = "" times_search = 0 final_time = "" # represent the elements which just before the session. session_rel_element_pre = "" # represent the elements which is mentioned in the session. session_rel_element_cur = "" for i in range(len(event_logs)): rel_element_cur = courseElementsFinder( event_logs[i], course_id) if session_id == "": if event_logs[i]["event_type"] in [ "forum_activity", "edx.forum.searched" ]: # Initialization session_id = "forum_session_" + course_learner_id start_time = event_logs[i]["event_time"] end_time = event_logs[i]["event_time"] if event_logs[i][ "event_type"] == "edx.forum.searched": times_search += 1 # Added for relevant element id session_rel_element_cur = rel_element_cur else: if event_logs[i]["event_type"] in [ "forum_activity", "edx.forum.searched" ]: if event_logs[i][ "event_time"] > end_time + datetime.timedelta( hours=0.5): session_id = session_id + "_" + str( start_time) + "_" + str(end_time) duration = (end_time - start_time ).days * 24 * 60 * 60 + ( end_time - start_time).seconds if duration > 5: rel_element_id = "" if session_rel_element_cur != "": rel_element_id = session_rel_element_cur else: rel_element_id = session_rel_element_pre array = [ session_id, course_learner_id, times_search, start_time, end_time, duration, rel_element_id ] forum_sessions_record.append(array) final_time = event_logs[i]["event_time"] # Re-initialization session_id = "forum_session_" + course_learner_id start_time = event_logs[i]["event_time"] end_time = event_logs[i]["event_time"] if event_logs[i][ "event_type"] == "edx.forum.searched": times_search = 1 # Added for relevant element id session_rel_element_cur = rel_element_cur else: end_time = event_logs[i]["event_time"] if event_logs[i][ "event_type"] == "edx.forum.searched": times_search += 1 if session_rel_element_cur == "": session_rel_element_cur = rel_element_cur else: if event_logs[i][ "event_time"] <= end_time + datetime.timedelta( hours=0.5): end_time = event_logs[i]["event_time"] session_id = session_id + "_" + str( start_time) + "_" + str(end_time) duration = (end_time - start_time).days * 24 * 60 * 60 + ( end_time - start_time).seconds if duration > 5: rel_element_id = "" if session_rel_element_cur != "": rel_element_id = session_rel_element_cur else: rel_element_id = session_rel_element_pre array = [ session_id, course_learner_id, times_search, start_time, end_time, duration, rel_element_id ] forum_sessions_record.append(array) final_time = event_logs[i]["event_time"] # Re-initialization session_id = "" start_time = "" end_time = "" times_search = 0 # session_rel_element_pre is used for recording the element id # of the most recent event logs before the session logs if rel_element_cur != "": session_rel_element_pre = rel_element_cur if final_time != "": new_logs = [] for log in event_logs: if log["event_time"] >= final_time: new_logs.append(log) updated_learner_all_event_logs[ course_learner_id] = new_logs log_file.close() current_date = getNextDay(current_date) # Database version for array in forum_sessions_record: session_id = array[0] course_learner_id = array[1] times_search = process_null(array[2]) start_time = array[3] end_time = array[4] duration = process_null(array[5]) rel_element_id = array[6] sql = "insert into forum_sessions (session_id, course_learner_id, times_search, start_time, end_time, duration, relevent_element_id) values (%s,%s,%s,%s,%s,%s,%s)" data = (session_id, course_learner_id, times_search, start_time, end_time, duration, rel_element_id) cursor.execute(sql, data) # File version '''
def forum_sessions(metadata_path, log_path, cursor): # Collect course information course_metadata_map = ExtractCourseInformation(metadata_path) start_date = course_metadata_map["start_date"] end_date = course_metadata_map["end_date"] current_date = start_date end_next_date = getNextDay(end_date) forum_event_types = [] forum_event_types.append("edx.forum.comment.created") forum_event_types.append("edx.forum.response.created") forum_event_types.append("edx.forum.response.voted") forum_event_types.append("edx.forum.thread.created") forum_event_types.append("edx.forum.thread.voted") forum_event_types.append("edx.forum.searched") learner_all_event_logs = {} updated_learner_all_event_logs = {} forum_sessions_record = [] log_files = os.listdir(log_path) while True: if current_date == end_next_date: break; for log_file in log_files: if current_date in log_file: print log_file learner_all_event_logs.clear() learner_all_event_logs = updated_learner_all_event_logs.copy() updated_learner_all_event_logs.clear() # Course_learner_id set course_learner_id_set = set() for course_learner_id in learner_all_event_logs.keys(): course_learner_id_set.add(course_learner_id) log_file = open(log_path + log_file,"r") lines = log_file.readlines() for line in lines: jsonObject = json.loads(line) # For forum session separation global_learner_id = jsonObject["context"]["user_id"] event_type = str(jsonObject["event_type"]) if "/discussion/" in event_type or event_type in forum_event_types: if event_type != "edx.forum.searched": event_type = "forum_activity" if global_learner_id != "": course_id = jsonObject["context"]["course_id"] course_learner_id = course_id + "_" + str(global_learner_id) event_time = jsonObject["time"] event_time = event_time[0:19] event_time = event_time.replace("T", " ") event_time = datetime.datetime.strptime(event_time,"%Y-%m-%d %H:%M:%S") if course_learner_id in course_learner_id_set: learner_all_event_logs[course_learner_id].append({"event_time":event_time, "event_type":event_type}) else: learner_all_event_logs[course_learner_id] = [{"event_time":event_time, "event_type":event_type}] course_learner_id_set.add(course_learner_id) # For forum session separation for learner in learner_all_event_logs.keys(): course_learner_id = learner event_logs = learner_all_event_logs[learner] # Sorting event_logs.sort(cmp=cmp_datetime, key=operator.itemgetter('event_time')) session_id = "" start_time = "" end_time = "" times_search = 0 final_time = "" for i in range(len(event_logs)): if session_id =="": if event_logs[i]["event_type"] in ["forum_activity", "edx.forum.searched"]: # Initialization session_id = "forum_session_" + course_learner_id start_time = event_logs[i]["event_time"] end_time = event_logs[i]["event_time"] if event_logs[i]["event_type"] == "edx.forum.searched": times_search += 1 else: if event_logs[i]["event_type"] in ["forum_activity", "edx.forum.searched"]: if event_logs[i]["event_time"] > end_time + datetime.timedelta(hours=0.5): session_id = session_id + "_" + str(start_time) + "_" + str(end_time) duration = (end_time - start_time).days * 24 * 60 * 60 + (end_time - start_time).seconds if duration > 5: array = [session_id, course_learner_id, times_search, start_time, end_time, duration] forum_sessions_record.append(array) final_time = event_logs[i]["event_time"] # Re-initialization session_id = "forum_session_" + course_learner_id start_time = event_logs[i]["event_time"] end_time = event_logs[i]["event_time"] if event_logs[i]["event_type"] == "edx.forum.searched": times_search = 1 else: end_time = event_logs[i]["event_time"] if event_logs[i]["event_type"] == "edx.forum.searched": times_search += 1 else: end_time = event_logs[i]["event_time"] session_id = session_id + "_" + str(start_time) + "_" + str(end_time) duration = (end_time - start_time).days * 24 * 60 * 60 + (end_time - start_time).seconds if duration > 5: array = [session_id, course_learner_id, times_search, start_time, end_time, duration] forum_sessions_record.append(array) final_time = event_logs[i]["event_time"] # Re-initialization session_id = "" start_time = "" end_time = "" times_search = 0 if final_time != "": new_logs = [] for log in event_logs: if log["event_time"] >= final_time: new_logs.append(log) updated_learner_all_event_logs[course_learner_id] = new_logs log_file.close() current_date = getNextDay(current_date) # Database version for array in forum_sessions_record: session_id = array[0] course_learner_id = array[1] times_search = array[2] start_time = array[3] end_time = array[4] duration = array[5] sql = "insert into forum_sessions (session_id, course_learner_id, times_search, start_time, end_time, duration) values" sql += "('%s','%s','%s','%s', '%s','%s');" % (session_id, course_learner_id, times_search, start_time, end_time, duration) cursor.execute(sql) # File version '''
def sessions(metadata_path, log_path, cursor): # Collect course information course_metadata_map = ExtractCourseInformation(metadata_path) current_date = course_metadata_map["start_date"] end_next_date = getNextDay(course_metadata_map["end_date"]) learner_all_event_logs = {} updated_learner_all_event_logs = {} session_record = [] log_files = os.listdir(log_path) while True: if current_date == end_next_date: break for file in log_files: if current_date in file: print file learner_all_event_logs.clear() learner_all_event_logs = updated_learner_all_event_logs.copy() updated_learner_all_event_logs.clear() # Course_learner_id set course_learner_id_set = set() for course_learner_id in learner_all_event_logs.keys(): course_learner_id_set.add(course_learner_id) input_file = open(log_path + file, "r") lines = input_file.readlines() for line in lines: jsonObject = json.loads(line) # Some daily logs don't have the "user_id" value if "user_id" not in jsonObject["context"]: continue global_learner_id = jsonObject["context"]["user_id"] event_type = str(jsonObject["event_type"]) if global_learner_id != "": course_id = jsonObject["context"]["course_id"] course_learner_id = course_id + "_" + str( global_learner_id) event_time = jsonObject["time"] event_time = event_time[0:19] event_time = event_time.replace("T", " ") event_time = datetime.datetime.strptime( event_time, "%Y-%m-%d %H:%M:%S") if course_learner_id in course_learner_id_set: learner_all_event_logs[course_learner_id].append({ "event_time": event_time, "event_type": event_type }) else: learner_all_event_logs[course_learner_id] = [{ "event_time": event_time, "event_type": event_type }] course_learner_id_set.add(course_learner_id) for course_learner_id in learner_all_event_logs.keys(): event_logs = learner_all_event_logs[course_learner_id] # Sorting event_logs.sort(cmp=cmp_datetime, key=operator.itemgetter('event_time')) session_id = "" start_time = "" end_time = "" final_time = "" for i in range(len(event_logs)): if start_time == "": # Initialization start_time = event_logs[i]["event_time"] end_time = event_logs[i]["event_time"] else: if event_logs[i][ "event_time"] > end_time + datetime.timedelta( hours=0.5): session_id = course_learner_id + "_" + str( start_time) + "_" + str(end_time) duration = (end_time - start_time).days * 24 * 60 * 60 + ( end_time - start_time).seconds if duration > 5: array = [ session_id, course_learner_id, start_time, end_time, duration ] session_record.append(array) final_time = event_logs[i]["event_time"] # Re-initialization session_id = "" start_time = event_logs[i]["event_time"] end_time = event_logs[i]["event_time"] else: if event_logs[i]["event_type"] == "page_close": end_time = event_logs[i]["event_time"] session_id = course_learner_id + "_" + str( start_time) + "_" + str(end_time) duration = (end_time - start_time ).days * 24 * 60 * 60 + ( end_time - start_time).seconds if duration > 5: array = [ session_id, course_learner_id, start_time, end_time, duration ] session_record.append(array) # Re-initialization session_id = "" start_time = "" end_time = "" final_time = event_logs[i]["event_time"] else: end_time = event_logs[i]["event_time"] if final_time != "": new_logs = [] for log in event_logs: if log["event_time"] >= final_time: new_logs.append(log) updated_learner_all_event_logs[ course_learner_id] = new_logs current_date = getNextDay(current_date) # Filter duplicated records updated_session_record = [] session_id_set = set() for array in session_record: session_id = array[0] if session_id not in session_id_set: session_id_set.add(session_id) updated_session_record.append(array) session_record = updated_session_record # Database version for array in session_record: session_id = array[0] course_learner_id = array[1] start_time = array[2] end_time = array[3] duration = process_null(array[4]) sql = "insert into sessions(session_id, course_learner_id, start_time, end_time, duration) values (%s,%s,%s,%s,%s)" data = (session_id, course_learner_id, start_time, end_time, duration) cursor.execute(sql, data) # File version '''
def quiz_sessions(metadata_path, log_path, cursor): # Collect course information course_metadata_map = ExtractCourseInformation(metadata_path) # Processing events data submission_event_collection = [] # Problem check submission_event_collection.append("problem_check") # Server submission_event_collection.append("save_problem_check") submission_event_collection.append("problem_check_fail") submission_event_collection.append("save_problem_check_fail") # The server emits a problem_graded event each time a user selects Check for a problem and it is graded success- fully. submission_event_collection.append("problem_graded") # The server emits problem_rescore events when a problem is successfully rescored. submission_event_collection.append("problem_rescore") submission_event_collection.append("problem_rescore_fail") submission_event_collection.append("problem_reset") # event_source: serve submission_event_collection.append("reset_problem") submission_event_collection.append("reset_problem_fail") # The server emits problem_save events after a user saves a problem. submission_event_collection.append("problem_save") # event_source: server submission_event_collection.append("save_problem_fail") submission_event_collection.append("save_problem_success") # Show answer submission_event_collection.append("problem_show") submission_event_collection.append("showanswer") current_date = course_metadata_map["start_date"] end_next_date = getNextDay(course_metadata_map["end_date"]) log_files = os.listdir(log_path) child_parent_map = course_metadata_map["child_parent_map"] learner_all_event_logs = {} updated_learner_all_event_logs = {} quiz_sessions = {} while True: if current_date == end_next_date: break for file in log_files: if current_date in file: print file learner_all_event_logs.clear() learner_all_event_logs = updated_learner_all_event_logs.copy() updated_learner_all_event_logs.clear() # Course_learner_id set course_learner_id_set = set() for course_learner_id in learner_all_event_logs.keys(): course_learner_id_set.add(course_learner_id) input_file = open(log_path + file, "r") lines = input_file.readlines() for line in lines: jsonObject = json.loads(line) # Some daily logs don't have the "user_id" value if "user_id" not in jsonObject["context"]: continue global_learner_id = jsonObject["context"]["user_id"] event_type = str(jsonObject["event_type"]) if global_learner_id != "": course_id = jsonObject["context"]["course_id"] course_learner_id = course_id + "_" + str( global_learner_id) event_time = jsonObject["time"] event_time = event_time[0:19] event_time = event_time.replace("T", " ") event_time = datetime.datetime.strptime( event_time, "%Y-%m-%d %H:%M:%S") if learner_all_event_logs.has_key(course_learner_id): learner_all_event_logs[course_learner_id].append({ "event_time": event_time, "event_type": event_type }) else: learner_all_event_logs[course_learner_id] = [{ "event_time": event_time, "event_type": event_type }] # For quiz session separation for course_learner_id in learner_all_event_logs.keys(): event_logs = learner_all_event_logs[course_learner_id] # Sorting event_logs.sort(cmp=cmp_datetime, key=operator.itemgetter('event_time')) session_id = "" start_time = "" end_time = "" final_time = "" for i in range(len(event_logs)): if session_id == "": if "problem+block" in event_logs[i][ "event_type"] or "_problem;_" in event_logs[ i]["event_type"] or event_logs[i][ "event_type"] in submission_event_collection: event_type_array = event_logs[i][ "event_type"].split("/") if "problem+block" in event_logs[i][ "event_type"]: question_id = event_type_array[4] if "_problem;_" in event_logs[i]["event_type"]: question_id = event_type_array[6].replace( ";_", "/") if question_id in child_parent_map.keys(): parent_block_id = child_parent_map[ question_id] session_id = "quiz_session_" + parent_block_id + "_" + course_learner_id start_time = event_logs[i]["event_time"] end_time = event_logs[i]["event_time"] else: if "problem+block" in event_logs[i][ "event_type"] or "_problem;_" in event_logs[ i]["event_type"] or event_logs[i][ "event_type"] in submission_event_collection: if event_logs[i][ "event_time"] > end_time + datetime.timedelta( hours=0.5): if quiz_sessions.has_key(session_id): quiz_sessions[session_id][ "time_array"].append({ "start_time": start_time, "end_time": end_time }) else: quiz_sessions[session_id] = { "course_learner_id": course_learner_id, "time_array": [{ "start_time": start_time, "end_time": end_time }] } final_time = event_logs[i]["event_time"] if "problem+block" in event_logs[i][ "event_type"] or "_problem;_" in event_logs[ i]["event_type"] or event_logs[ i]["event_type"] in submission_event_collection: event_type_array = event_logs[i][ "event_type"].split("/") if "problem+block" in event_logs[i][ "event_type"]: question_id = event_type_array[4] if "_problem;_" in event_logs[i][ "event_type"]: question_id = event_type_array[ 6].replace(";_", "/") if question_id in child_parent_map.keys( ): parent_block_id = child_parent_map[ question_id] session_id = "quiz_session_" + parent_block_id + "_" + course_learner_id start_time = event_logs[i][ "event_time"] end_time = event_logs[i][ "event_time"] else: session_id = "" start_time = "" end_time = "" else: end_time = event_logs[i]["event_time"] else: if event_logs[i][ "event_time"] <= end_time + datetime.timedelta( hours=0.5): end_time = event_logs[i]["event_time"] if quiz_sessions.has_key(session_id): quiz_sessions[session_id][ "time_array"].append({ "start_time": start_time, "end_time": end_time }) else: quiz_sessions[session_id] = { "course_learner_id": course_learner_id, "time_array": [{ "start_time": start_time, "end_time": end_time }] } final_time = event_logs[i]["event_time"] session_id = "" start_time = "" end_time = "" if final_time != "": new_logs = [] for log in event_logs: if log["event_time"] >= final_time: new_logs.append(log) updated_learner_all_event_logs[ course_learner_id] = new_logs current_date = getNextDay(current_date) # To compress the session event_logs for session_id in quiz_sessions.keys(): if len(quiz_sessions[session_id]["time_array"]) > 1: start_time = "" end_time = "" updated_time_array = [] for i in range(len(quiz_sessions[session_id]["time_array"])): if i == 0: start_time = quiz_sessions[session_id]["time_array"][i][ "start_time"] end_time = quiz_sessions[session_id]["time_array"][i][ "end_time"] else: if quiz_sessions[session_id]["time_array"][i][ "start_time"] > end_time + datetime.timedelta( hours=0.5): updated_time_array.append({ "start_time": start_time, "end_time": end_time }) start_time = quiz_sessions[session_id]["time_array"][ i]["start_time"] end_time = quiz_sessions[session_id]["time_array"][i][ "end_time"] if i == len( quiz_sessions[session_id]["time_array"]) - 1: updated_time_array.append({ "start_time": start_time, "end_time": end_time }) else: end_time = quiz_sessions[session_id]["time_array"][i][ "end_time"] if i == len( quiz_sessions[session_id]["time_array"]) - 1: updated_time_array.append({ "start_time": start_time, "end_time": end_time }) quiz_sessions[session_id]["time_array"] = updated_time_array quiz_session_record = [] for session_id in quiz_sessions.keys(): course_learner_id = quiz_sessions[session_id]["course_learner_id"] for i in range(len(quiz_sessions[session_id]["time_array"])): start_time = quiz_sessions[session_id]["time_array"][i][ "start_time"] end_time = quiz_sessions[session_id]["time_array"][i]["end_time"] if start_time < end_time: duration = (end_time - start_time).days * 24 * 60 * 60 + ( end_time - start_time).seconds final_session_id = session_id + "_" + str( start_time) + "_" + str(end_time) if duration > 5: array = [ final_session_id, course_learner_id, start_time, end_time, duration ] quiz_session_record.append(array) # Database version for array in quiz_session_record: session_id = array[0] course_learner_id = array[1] start_time = array[2] end_time = array[3] duration = process_null(array[4]) sql = "insert into quiz_sessions (session_id, course_learner_id, start_time, end_time, duration) values (%s,%s,%s,%s,%s)" data = (session_id, course_learner_id, start_time, end_time, duration) cursor.execute(sql, data) '''
def quiz_mode(metadata_path, log_path, cursor): # quiz_question_record = [] # submissions = {} # assessments = {} # Collect course information course_metadata_map = ExtractCourseInformation(metadata_path) quiz_question_map = course_metadata_map["quiz_question_map"] block_type_map = course_metadata_map["block_type_map"] element_time_map_due = course_metadata_map["element_time_map_due"] for question_id in quiz_question_map: question_due = "" question_weight = quiz_question_map[question_id] quiz_question_parent = course_metadata_map["child_parent_map"][ question_id] if (question_due == "") and (quiz_question_parent in element_time_map_due): question_due = element_time_map_due[quiz_question_parent] while not block_type_map.has_key(quiz_question_parent): quiz_question_parent = course_metadata_map["child_parent_map"][ quiz_question_parent] if (question_due == "") and (quiz_question_parent in element_time_map_due): question_due = element_time_map_due[quiz_question_parent] quiz_question_type = block_type_map[quiz_question_parent] question_due = process_null(question_due) # array_quiz = [question_id, quiz_question_type, question_weight, question_due] # quiz_question_record.append(array_quiz) sql = "insert into quiz_questions(question_id, question_type, question_weight, question_due) values (%s,%s,%s,%s)" data = (question_id, quiz_question_type, question_weight, question_due) cursor.execute(sql, data) # Processing events data submission_event_collection = [] # Problem check submission_event_collection.append("problem_check") # Server ''' submission_event_collection.append("save_problem_check") submission_event_collection.append("problem_check_fail") submission_event_collection.append("save_problem_check_fail") # The server emits a problem_graded event each time a user selects Check for a problem and it is graded success- fully. submission_event_collection.append("problem_graded") # The server emits problem_rescore events when a problem is successfully rescored. submission_event_collection.append("problem_rescore") submission_event_collection.append("problem_rescore_fail") submission_event_collection.append("problem_reset") # event_source: serve submission_event_collection.append("reset_problem") submission_event_collection.append("reset_problem_fail") # The server emits problem_save events after a user saves a problem. submission_event_collection.append("problem_save") # event_source: server submission_event_collection.append("save_problem_fail") submission_event_collection.append("save_problem_success") # Show answer submission_event_collection.append("problem_show") submission_event_collection.append("showanswer") ''' current_date = course_metadata_map["start_date"] end_next_date = getNextDay(course_metadata_map["end_date"]) log_files = os.listdir(log_path) submission_uni_index = 0 while True: if current_date == end_next_date: break for file in log_files: if current_date in file: print file input_file = open(log_path + file, "r") lines = input_file.readlines() for line in lines: jsonObject = json.loads(line) if jsonObject["event_type"] in submission_event_collection: # Some daily logs don't have the "user_id" value if "user_id" not in jsonObject["context"]: continue global_learner_id = jsonObject["context"]["user_id"] if global_learner_id != "": course_id = jsonObject["context"]["course_id"] course_learner_id = course_id + "_" + str( global_learner_id) question_id = "" grade = "" max_grade = "" event_time = jsonObject["time"] event_time = event_time[0:19] event_time = event_time.replace("T", " ") event_time = datetime.datetime.strptime( event_time, "%Y-%m-%d %H:%M:%S") if isinstance(jsonObject["event"], dict): question_id = jsonObject["event"]["problem_id"] # The fields "grade" and "max_grade" are specific to submission event "problem_check" if jsonObject["event"].has_key( "grade" ) and jsonObject["event"].has_key("max_grade"): grade = jsonObject["event"]["grade"] max_grade = jsonObject["event"][ "max_grade"] if question_id != "": submission_id = course_learner_id + "_" + question_id + "_" + str( submission_uni_index) submission_uni_index = submission_uni_index + 1 # For submissions # array_submission = [submission_id, course_learner_id, question_id, event_time] # submissions[submission_id] = array_submission submission_timestamp = event_time sql = "insert into submissions(submission_id, course_learner_id, question_id, submission_timestamp) values (%s,%s,%s,%s)" data = (submission_id, course_learner_id, question_id, submission_timestamp) cursor.execute(sql, data) # For assessments if grade != "" and max_grade != "": # array_assessment = [submission_id, course_learner_id, max_grade, grade] # assessments[submission_id] = array_assessment assessment_id = submission_id sql = "insert into assessments(assessment_id, course_learner_id, max_grade, grade) values (%s,%s,%s,%s)" data = (assessment_id, course_learner_id, max_grade, grade) cursor.execute(sql, data) current_date = getNextDay(current_date) # submission_record = [] # assessment_record = [] # for submission_id in submissions.keys(): # submission_record.append(submissions[submission_id]) # for assessment_id in assessments.keys(): # assessment_record.append(assessments[assessment_id]) # Database version # Quiz_question table # for array in quiz_question_record: # question_id = array[0] # question_type = array[1] # question_weight = array[2] # question_due = array[3] # sql = "insert into quiz_questions(question_id, question_type, question_weight, question_due) values" # sql += "('%s','%s','%s','%s');" % (question_id, question_type, question_weight, question_due) # cursor.execute(sql) # Submissions table # for array in submission_record: # submission_id = array[0] # course_learner_id = array[1] # question_id = array[2] # submission_timestamp = array[3] # sql = "insert into submissions(submission_id, course_learner_id, question_id, submission_timestamp) values" # sql += "('%s','%s','%s','%s');" % (submission_id, course_learner_id, question_id, submission_timestamp) # cursor.execute(sql) # Submissions table # for array in assessment_record: # assessment_id = array[0] # course_learner_id = array[1] # max_grade = array[2] # grade = array[3] # sql = "insert into assessments(assessment_id, course_learner_id, max_grade, grade) values" # sql += "('%s','%s','%s','%s');" % (assessment_id, course_learner_id, max_grade, grade) # cursor.execute(sql) '''
def video_interaction(metadata_path, log_path, cursor): # Collect course information course_metadata_map = ExtractCourseInformation(metadata_path) current_date = course_metadata_map["start_date"] end_next_date = getNextDay(course_metadata_map["end_date"]) video_interaction_map = {} # Video-related event types video_event_types = [] video_event_types.append("hide_transcript") video_event_types.append("edx.video.transcript.hidden") video_event_types.append("edx.video.closed_captions.hidden") video_event_types.append("edx.video.closed_captions.shown") video_event_types.append("load_video") video_event_types.append("edx.video.loaded") video_event_types.append("pause_video") video_event_types.append("edx.video.paused") video_event_types.append("play_video") video_event_types.append("edx.video.played") video_event_types.append("seek_video") video_event_types.append("edx.video.position.changed") video_event_types.append("show_transcript") video_event_types.append("edx.video.transcript.shown") video_event_types.append("speed_change_video") video_event_types.append("stop_video") video_event_types.append("edx.video.stopped") video_event_types.append("video_hide_cc_menu") video_event_types.append("edx.video.language_menu.hidden") video_event_types.append("video_show_cc_menu") video_event_types.append("edx.video.language_menu.shown") ''' # Navigation-related event types navigation_event_types = [] navigation_event_types.append("page_close") navigation_event_types.append("seq_goto") navigation_event_types.append("seq_next") navigation_event_types.append("seq_prev") ''' learner_video_event_logs = {} updated_learner_video_event_logs = {} log_files = os.listdir(log_path) while True: if current_date == end_next_date: break; for file in log_files: if current_date in file: print file learner_video_event_logs.clear() learner_video_event_logs = updated_learner_video_event_logs.copy() updated_learner_video_event_logs.clear() # Course_learner_id set course_learner_id_set = set() for course_learner_id in learner_video_event_logs.keys(): course_learner_id_set.add(course_learner_id) input_file = open(log_path + file,"r") lines = input_file.readlines() for line in lines: jsonObject = json.loads(line) if jsonObject["event_type"] in video_event_types: # Some daily logs don't have the "user_id" value if "user_id" not in jsonObject["context"]: continue global_learner_id = jsonObject["context"]["user_id"] if global_learner_id != "": course_id = jsonObject["context"]["course_id"] course_learner_id = course_id + "_" + str(global_learner_id) video_id = "" event_time = jsonObject["time"] event_time = event_time[0:19] event_time = event_time.replace("T", " ") event_time = datetime.datetime.strptime(event_time,"%Y-%m-%d %H:%M:%S") event_type = jsonObject["event_type"] # For seek event new_time = 0 old_time = 0 # For speed change event new_speed = 0 old_speed = 0 # This sub-condition does not exist in log data # if isinstance(jsonObject["event"], dict): # video_id = jsonObject["event"]["id"] if isinstance(jsonObject["event"], unicode): event_jsonObject = json.loads(jsonObject["event"]) video_id = event_jsonObject["id"] video_id = video_id.replace("-", "://", 1) video_id = video_id.replace("-", "/") # For video seek event if "new_time" in event_jsonObject and "old_time" in event_jsonObject: new_time = event_jsonObject["new_time"] old_time = event_jsonObject["old_time"] # For video speed change event if "new_speed" in event_jsonObject and "old_speed" in event_jsonObject: new_speed = event_jsonObject["new_speed"] old_speed = event_jsonObject["old_speed"] # To record video seek event if event_type in ["seek_video","edx.video.position.changed"]: if new_time is not None and old_time is not None: if course_learner_id in course_learner_id_set: learner_video_event_logs[course_learner_id].append({"event_time":event_time, "event_type":event_type, "video_id":video_id, "new_time":new_time, "old_time":old_time}) else: learner_video_event_logs[course_learner_id] = [{"event_time":event_time, "event_type":event_type, "video_id":video_id, "new_time":new_time, "old_time":old_time}] course_learner_id_set.add(course_learner_id) continue # To record video speed change event if event_type in ["speed_change_video"]: if course_learner_id in course_learner_id_set: learner_video_event_logs[course_learner_id].append({"event_time":event_time, "event_type":event_type, "video_id":video_id, "new_speed":new_speed, "old_speed":old_speed}) else: learner_video_event_logs[course_learner_id] = [{"event_time":event_time, "event_type":event_type, "video_id":video_id, "new_speed":new_speed, "old_speed":old_speed}] course_learner_id_set.add(course_learner_id) continue if course_learner_id in course_learner_id_set: learner_video_event_logs[course_learner_id].append({"event_time":event_time, "event_type":event_type, "video_id":video_id}) else: learner_video_event_logs[course_learner_id] = [{"event_time":event_time, "event_type":event_type, "video_id":video_id}] course_learner_id_set.add(course_learner_id) # For non-video-related events if jsonObject["event_type"] not in video_event_types: # Some daily logs don't have the "user_id" value if "user_id" not in jsonObject["context"]: continue global_learner_id = jsonObject["context"]["user_id"] if global_learner_id != "": course_id = jsonObject["context"]["course_id"] course_learner_id = course_id + "_" + str(global_learner_id) event_time = jsonObject["time"] event_time = event_time[0:19] event_time = event_time.replace("T", " ") event_time = datetime.datetime.strptime(event_time,"%Y-%m-%d %H:%M:%S") event_type = jsonObject["event_type"] if course_learner_id in course_learner_id_set: learner_video_event_logs[course_learner_id].append({"event_time":event_time, "event_type":event_type}) else: learner_video_event_logs[course_learner_id] = [{"event_time":event_time, "event_type":event_type}] course_learner_id_set.add(course_learner_id) for course_learner_id in learner_video_event_logs.keys(): video_id = "" event_logs = learner_video_event_logs[course_learner_id] # Sorting event_logs.sort(cmp=cmp_datetime, key=operator.itemgetter('event_time')) video_start_time = "" final_time = "" # For video seek event times_forward_seek = 0 duration_forward_seek = 0 times_backward_seek = 0 duration_backward_seek = 0 # For video speed change event speed_change_last_time = "" times_speed_up = 0 times_speed_down = 0 # For video pause event pause_check = False pause_start_time = "" duration_pause = 0 for log in event_logs: if log["event_type"] in ["play_video", "edx.video.played"]: video_start_time = log["event_time"] video_id = log["video_id"] if pause_check: duration_pause = (log["event_time"] - pause_start_time).seconds video_interaction_id = course_learner_id + "_" + video_id + "_" + str(pause_start_time) if duration_pause > 2 and duration_pause < 600: if video_interaction_id in video_interaction_map.keys(): video_interaction_map[video_interaction_id]["times_pause"] = 1 video_interaction_map[video_interaction_id]["duration_pause"] = duration_pause pause_check = False continue if video_start_time != "": if log["event_time"] > video_start_time + datetime.timedelta(hours=0.5): video_start_time = "" video_id = "" final_time = log["event_time"] else: # 0. Seek if log["event_type"] in ["seek_video", "edx.video.position.changed"] and video_id == log["video_id"]: # Forward seek event if log["new_time"] > log["old_time"]: times_forward_seek += 1 duration_forward_seek += log["new_time"] - log["old_time"] # Backward seek event if log["new_time"] < log["old_time"]: times_backward_seek += 1 duration_backward_seek += log["old_time"] - log["new_time"] continue # 1. Speed change if log["event_type"] == "speed_change_video" and video_id == log["video_id"]: if speed_change_last_time == "": speed_change_last_time = log["event_time"] old_speed = log["old_speed"] new_speed = log["new_speed"] if old_speed < new_speed: times_speed_up += 1 if old_speed > new_speed: times_speed_down += 1 else: if (log["event_time"] - speed_change_last_time).seconds > 10: old_speed = log["old_speed"] new_speed = log["new_speed"] if old_speed < new_speed: times_speed_up += 1 if old_speed > new_speed: times_speed_down += 1 speed_change_last_time = log["event_time"] continue # 2. Pause/Stop situation if log["event_type"] in ["pause_video", "edx.video.paused", "stop_video", "edx.video.stopped"] and video_id == log["video_id"]: watch_duration = (log["event_time"] - video_start_time).seconds video_end_time = log["event_time"] video_interaction_id = course_learner_id + "_" + video_id + "_" + str(video_end_time) if watch_duration > 5: video_interaction_map[video_interaction_id] = {"course_learner_id":course_learner_id, "video_id":video_id, "type": "video", "watch_duration":watch_duration, "times_forward_seek":times_forward_seek, "duration_forward_seek":duration_forward_seek, "times_backward_seek":times_backward_seek, "duration_backward_seek":duration_backward_seek, "times_speed_up":times_speed_up, "times_speed_down":times_speed_down, "start_time":video_start_time, "end_time":video_end_time} if log["event_type"] in ["pause_video", "edx.video.paused"]: pause_check = True pause_start_time = video_end_time # For video seek event times_forward_seek = 0 duration_forward_seek = 0 times_backward_seek = 0 duration_backward_seek = 0 # For video speed change event speed_change_last_time = "" times_speed_up = 0 times_speed_down = 0 # For video general information video_start_time ="" video_id = "" final_time = log["event_time"] continue # 3/4 Page changed/Session closed if log["event_type"] not in video_event_types: video_end_time = log["event_time"] watch_duration = (video_end_time - video_start_time).seconds video_interaction_id = course_learner_id + "_" + video_id + "_" + str(video_end_time) if watch_duration > 5: video_interaction_map[video_interaction_id] = {"course_learner_id":course_learner_id, "video_id":video_id, "type": "video", "watch_duration":watch_duration, "times_forward_seek":times_forward_seek, "duration_forward_seek":duration_forward_seek, "times_backward_seek":times_backward_seek, "duration_backward_seek":duration_backward_seek, "times_speed_up":times_speed_up, "times_speed_down":times_speed_down, "start_time":video_start_time, "end_time":video_end_time} # For video seek event times_forward_seek = 0 duration_forward_seek = 0 times_backward_seek = 0 duration_backward_seek = 0 # For video speed change event speed_change_last_time = "" times_speed_up = 0 times_speed_down = 0 # For video general information video_start_time = "" video_id = "" final_time = log["event_time"] continue if final_time != "": new_logs = [] for log in event_logs: if log["event_time"] > final_time: new_logs.append(log) updated_learner_video_event_logs[course_learner_id] = new_logs current_date = getNextDay(current_date) video_interaction_record = [] for interaction_id in video_interaction_map.keys(): video_interaction_id = interaction_id course_learner_id = video_interaction_map[interaction_id]["course_learner_id"] video_id = video_interaction_map[interaction_id]["video_id"] duration = video_interaction_map[interaction_id]["watch_duration"] times_forward_seek = video_interaction_map[interaction_id]["times_forward_seek"] duration_forward_seek = video_interaction_map[interaction_id]["duration_forward_seek"] times_backward_seek = video_interaction_map[interaction_id]["times_backward_seek"] duration_backward_seek = video_interaction_map[interaction_id]["duration_backward_seek"] times_speed_up = video_interaction_map[interaction_id]["times_speed_up"] times_speed_down = video_interaction_map[interaction_id]["times_speed_down"] start_time = video_interaction_map[interaction_id]["start_time"] end_time = video_interaction_map[interaction_id]["end_time"] if "times_pause" in video_interaction_map[interaction_id]: times_pause = video_interaction_map[interaction_id]["times_pause"] duration_pause = video_interaction_map[interaction_id]["duration_pause"] else: times_pause = 0 duration_pause = 0 array = [video_interaction_id, course_learner_id, video_id, duration, times_forward_seek, duration_forward_seek, times_backward_seek, duration_backward_seek, times_speed_up, times_speed_down, times_pause, duration_pause, start_time, end_time] video_interaction_record.append(array) # Video_interaction table # Database version for array in video_interaction_record: interaction_id = array[0] course_learner_id = array[1] video_id = array[2] duration = process_null(array[3]) times_forward_seek = process_null(array[4]) duration_forward_seek = process_null(array[5]) times_backward_seek = process_null(array[6]) duration_backward_seek = process_null(array[7]) times_speed_up = process_null(array[8]) times_speed_down = process_null(array[9]) times_pause = process_null(array[10]) duration_pause = process_null(array[11]) start_time = array[12] end_time = array[13] sql = "insert into video_interaction(interaction_id, course_learner_id, video_id, duration, times_forward_seek, duration_forward_seek, times_backward_seek, duration_backward_seek, times_speed_up, times_speed_down, times_pause, duration_pause, start_time, end_time) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" data = (interaction_id, course_learner_id, video_id, duration, times_forward_seek, duration_forward_seek, times_backward_seek, duration_backward_seek, times_speed_up, times_speed_down, times_pause, duration_pause, start_time, end_time) cursor.execute(sql, data) # File version '''
def main(argv): # Read configs config = ConfigParser.ConfigParser() config.read(argv[0]) # All the configs are read as string course_log_path = config.get("data", "path") remove_filtered_logs = config.get("data", "remove_filtered_logs") log_update_list = json.loads(config.get("data", "log_update_list")) metadata_update_list = json.loads(config.get("data", "metadata_update_list")) survey_update_map = json.loads(config.get("data", "survey_update_map")) user = config.get("mysqld", "user") password = config.get("mysqld", "password") host = config.get("mysqld", "host") database = config.get("mysqld", "database") # Database connection = mysql.connector.connect(user=user, password=password, host=host, database=database, charset='utf8mb4') cursor = connection.cursor() # Delete relevant records before updating the database print "Removing log records..." for course_code in log_update_list: print str("\t" + course_code) RemoveCourseRecords(course_log_path, course_code, "log", cursor) print "Removing metadata records..." for course_code in metadata_update_list: print str("\t" + course_code) RemoveCourseRecords(course_log_path, course_code, "metadata", cursor) print "Removing survey records..." for course_code in survey_update_map.keys(): print str("\t" + course_code) RemoveCourseRecords(course_log_path, course_code, "survey", cursor) print folders = os.listdir(course_log_path) for folder in folders: if folder != "daily_logs": # Only for Mac OS if folder == ".DS_Store": continue course_code = folder print "Processing\t" + course_code # A file named "course_processing_tracker" (JSON format) is created # for each course to keep track of the processing files tracker_path = str(course_log_path + course_code + "/course_processing_tracker") if not os.path.exists(tracker_path): output_file = open(tracker_path, "w") tracker_map = {} # This value is used to keep track of the processing status for the course' daily log files, # i.e., "False" (not finished yet) and "True" (finished) tracker_map["status"] = False tracker_map["processed_dates"] = [] tracker_map["num_processed_dates"] = 0 output_file.write(json.dumps(tracker_map)) output_file.close() # Read the "course_processing_tracker" file input_file = open(tracker_path, "r") tracker_map = json.loads(input_file.read()) input_file.close() metadata_path = str(course_log_path + course_code + "/metadata/") # Determine whether the course_structure file is present mark = False files = os.listdir(metadata_path) for file in files: if "course_structure" in file: mark = True break if not mark: print "The course structure file is missing.\n" continue # Learner mode if course_code in metadata_update_list: print "Learner Mode processing..." learner_mode(metadata_path, course_code, cursor) # Survey mode survey_path = str(course_log_path + course_code + "/surveys/") if course_code in survey_update_map.keys(): print "Survey Mode processing..." pre_id_index = int(survey_update_map[course_code][0]) post_id_index = int(survey_update_map[course_code][1]) survey_mode(metadata_path, survey_path, cursor, pre_id_index, post_id_index) if tracker_map["status"]: print continue # Retrieve the start/end date of the course course_metadata_map = ExtractCourseInformation(metadata_path) course_id = course_metadata_map["course_id"] start_date = course_metadata_map["start_date"] end_date = course_metadata_map["end_date"] current_date = start_date while current_date <= end_date: current_date_string = str(current_date)[0:10] if current_date_string not in tracker_map["processed_dates"]: daily_log_file = str("delftx-edx-events-" + current_date_string + ".log.gz") if os.path.exists(str(course_log_path + "/daily_logs/" + daily_log_file)): print daily_log_file # Decompress log files unzip_file_path = str(course_log_path + course_code + "/unzip_daily_logs/") if not os.path.exists(unzip_file_path): os.mkdir(unzip_file_path) output_path = str(unzip_file_path + daily_log_file[0:-3]) if not os.path.exists(output_path): output_file = open(output_path, 'w') with gzip.open(str(course_log_path + "/daily_logs/" + daily_log_file), 'r') as f: for line in f: jsonObject = json.loads(line) if course_id in jsonObject["context"]["course_id"]: output_file.write(line) output_file.close() daily_log_path = output_path # Video_interaction table # print "1.\t Video_interaction table processing..." remaining_video_interaction_log_path = course_log_path + course_code + "/remaining_video_interaction_logs" video_interaction(metadata_path, daily_log_path, remaining_video_interaction_log_path, cursor) # Quiz mode # print "2.\t Quiz mode processing..." quiz_mode(daily_log_path, cursor) # Quiz_sessions table # print "3.\t Quiz_sessions table processing..." remaining_quiz_session_log_path = course_log_path + course_code + "/remaining_quiz_session_logs" quiz_sessions(metadata_path, daily_log_path, remaining_quiz_session_log_path, cursor) # Forum_interaction table # print "4.\t Forum_interaction table processing..." forum_interaction(metadata_path, daily_log_path, cursor) # Forum_sessions table # print "5.\t Forum_sessions table processing..." remaining_forum_session_log_path = course_log_path + course_code + "/remaining_forum_session_logs" forum_sessions(metadata_path, daily_log_path, remaining_forum_session_log_path, cursor) # Sessions table # print "6.\t Sessions table processing..." remaining_session_log_path = course_log_path + course_code + "/remaining_session_logs" sessions(metadata_path, daily_log_path, remaining_session_log_path, cursor) tracker_map["processed_dates"].append(current_date_string) current_date = getNextDay(current_date) if len(tracker_map["processed_dates"]) == getDayDiff(start_date, end_date) + 1: tracker_map["status"] = True if tracker_map["num_processed_dates"] != len(tracker_map["processed_dates"]): tracker_map["num_processed_dates"] = len(tracker_map["processed_dates"]) output_file = open(tracker_path, "w") output_file.write(json.dumps(tracker_map)) output_file.close() # Delete the decompressed files if remove_filtered_logs == "1": log_files = os.listdir(str(course_log_path + "/daily_logs/")) for log_file in log_files: os.remove(str(course_log_path + "/daily_logs/" + log_file)) print