def write_dropout_time(click_path, lect_path, outpath_hour): lectures = [ lect for lect in Lecture.lectures(lect_path) if Lecture.is_valid_lecture(lect) ] lectures.sort(key=lambda l: int(l['open_time'])) second_half_lects = set([ l['id'] for l in lectures[((len(lectures)+1)/2):] ]) user_lects = defaultdict(set) user_last_access = defaultdict(int) for log in Clickstream.logs(click_path): if log['timestamp']/1000 > user_last_access[log['username']]: user_last_access[log['username']] = log['timestamp']/1000 m = re.search('lecture_id=([\\d]+)', log['page_url']) if m==None: m = re.search('lecture/([\\d]+)', log['page_url']) if m == None: continue user_lects[log['username']].add(m.group(1)) dropout_time = dict() for username, lects in user_lects.iteritems(): if len( lects & second_half_lects )==0: dropout_time[username] = user_last_access[username] outfile = open(outpath_hour,'w') out_csv = csv.writer(outfile) out_csv.writerow(['username','dropout_time']) out_csv.writerows( [ [username, time] for username,time in dropout_time.iteritems() ] ) outfile.close()
demo_csv = csv.reader(open(demo_path)) demo_csv.next() raw_demo_header = demo_csv.next() demo_header = [] demo_start, demo_end = (20,56) for h in raw_demo_header[demo_start:(demo_end+1)]: demo_header.append(re.sub(".*-", "", h).strip()) print demo_header demo_map = dict() for row in demo_csv: demo_map[row[2]] = [ 1 if r=="TRUE" else 0 for r in row[demo_start:(demo_end+1)] ] action_cnt = defaultdict(int) user_seqs = defaultdict(list) for lect in Lecture.lecture_submissions(db_path): user_seqs[lect['session_user_id']].append((lect['submission_time'],"L"+lect['item_id'],"0")) # include both 'view' and 'download' action_cnt["L"+lect['item_id']] += 1 for quiz in Quiz.quiz_submissions(db_path): user_seqs[quiz['session_user_id']].append((quiz['submission_time'],"Q"+quiz['item_id'],(0 if quiz['raw_score']=="NULL" else quiz['raw_score']))) action_cnt["Q"+quiz['item_id']] += 1 # quiz_submission = dict() # for log in Clickstream.logs(in_path): # action = re.sub("/$","", re.sub("https://class.coursera.org/algebra-001/", "", log["page_url"])) # # if not re.search("^(lecture|signature|quiz)", action): continue # action_refined = None #