def get_user_task_status(user, escape=False): res = user_db.select(u'status', dict(user=user), where=u'user = $user') pairs = ((r['task'], r['status']) for r in res) if escape: pairs = ((io_extra.html_escape(k), v) for (k, v) in pairs) return dict(pairs)
def report_edits_pauses(t_dir, r_dir): users = [] # Users in order user_dict = {} # user_dict[user] = user_i user_trans = [] # user_trans[user_i][sent_i] = [inter1, inter2] # where intermediate is (left, diff, right, type, time) base_trans = collections.defaultdict(dict) # base_trans[user_i] = list of translations before editing # Get the directory name / database entry for this task task_id = os.path.basename(os.path.abspath(t_dir)) # Read config config = io_extra.read_cfg(os.path.join(t_dir, 'config.txt')) task = config[u'task'] conn = sqlite3.connect(os.path.join(t_dir, 'data.db')) c = conn.cursor() # Find users who finished this task i = 0 for (u,) in c.execute('''SELECT DISTINCT user FROM status WHERE status='finished' ORDER BY user ASC'''): users.append(u) user_dict[u] = i i += 1 # Populate base translations (mt) for each user for user in users: for (sent, text) in c.execute('''SELECT sent, text FROM mt WHERE user=? ORDER BY sent ASC''', ('STATIC' if task == track.REALTIME_STATIC else user,)): base_trans[user][sent] = text # Each user starts with a list of intermediates, starting with the base translation user_trans.append([[['', t, '', -1, 'start', '']] for (i, t) in sorted(base_trans[user].iteritems())]) # Read user edits res = c.execute('''SELECT user, sent, caret, op, input, time FROM edits ORDER BY user ASC, sent ASC, time ASC''') # Trace edits, annotate with types, times for r in res: user_i = user_dict.get(r[0], -1) if user_i == -1: continue sent_i = r[1] - 1 caret = r[2] op = r[3] diff = r[4] time = r[5] if user_trans[user_i][sent_i][-1][3] in (-1, track.INS): prev = u''.join(user_trans[user_i][sent_i][-1][:3]) elif user_trans[user_i][sent_i][-1][3] == track.DEL: prev = u''.join((user_trans[user_i][sent_i][-1][0], user_trans[user_i][sent_i][-1][2])) else: io_extra.log('Unknown edit op, using emptry string') prev = u'' left = prev[:caret] # For deletes, diff overlaps prev, so cut out right = prev[caret + len(diff):] if op == track.DEL else prev[caret:] if op == track.INS: opclass = u'ins' elif op == track.DEL: opclass = u'del' else: # Only count inserts and deletes continue user_trans[user_i][sent_i].append([left, diff, right, op, opclass, unicode(time)]) # Final outputs for trans in user_trans: for sent in trans: op = sent[-1][3] if op == -1: prev = sent[-1][1] elif op == track.INS: prev = u''.join(sent[-1][:3]) else: prev = u''.join((sent[-1][0], sent[-1][2])) sent.append(['', prev, '', -1, 'end', '']) # Pull initial and final focus/blur times res = c.execute('''SELECT user, sent, time FROM events ORDER BY user ASC, sent ASC, time ASC''') i = -1 for r in res: user_i = user_dict.get(r[0], -1) if user_i == -1: continue sent_i = r[1] - 1 # Initial focus if sent_i != i: user_trans[user_i][sent_i][0][5] = str(r[2]) else: # overwrite with every following record user_trans[user_i][sent_i][-1][5] = str(r[2]) i = sent_i # Write user-specific reports (CSV) for (i, u) in enumerate(users): csv_out = codecs.open(os.path.join(r_dir, 'edits.' + u + '.csv'), mode='wb', encoding='UTF-8') print >>csv_out, u'\t'.join(('Time', 'Operation', 'Left', 'Edit', 'Right')) for sent_edits in user_trans[i]: for edit in sent_edits: print >>csv_out, u'\t'.join((edit[5], edit[4], edit[0], edit[1], edit[2])) # "empty" line print >>csv_out, u'\t'.join(('', '', '', '', '')) csv_out.close() # Escape everything for trans in user_trans: for sent in trans: for edit in sent: edit[0] = io_extra.html_escape(edit[0]) edit[1] = io_extra.html_escape(edit[1]) edit[2] = io_extra.html_escape(edit[2]) c.close() # Write user-specific reports (HTML) for (i, u) in enumerate(users): # Write out report using template html_out = codecs.open(os.path.join(r_dir, 'edits.' + u + '.html'), mode='wb', encoding='UTF-8') html_out.write(unicode(render.report_edits(user_trans[i]))) html_out.close() # Write user-specific pause reports (CSV only) for (i, u) in enumerate(users): csv_out = codecs.open(os.path.join(r_dir, 'pause.' + u + '.csv'), mode='wb', encoding='UTF-8') print >>csv_out, u'\t'.join(('ID', 'Initial', 'Final', 'Short', 'Medium', 'Long', 'Total Time', 'Pause Time', 'Words', 'APR', 'PWR')) for (j, sent_edits) in enumerate(user_trans[i]): # Count pauses (initial, final, short, medium, long) ip = 0 fp = 0 pause = {'s': 0, 'm': 0, 'l': 0, 't': 0} total = 0 words = 0 apr = 0 pwr = 0 def mark_pause(p): # Actually a pause if p >= PAUSE_SHORT: if p >= PAUSE_LONG: pause['l'] += 1 elif p >= PAUSE_MEDIUM: pause['m'] += 1 else: # p >= PAUSE_SHORT pause['s'] += 1 pause['t'] += p # Initial pause: ip = 0 try: ip = long(sent_edits[1][5]) - long(sent_edits[0][5]) except: io_extra.log(u'Warning: cannot compute initial pause, setting to 0 for ({}, {})'.format(u, j + 1)) mark_pause(ip) # If edited if len(sent_edits) > 2: for k in range(2, len(sent_edits) - 1): p = long(sent_edits[k][5]) - long(sent_edits[k - 1][5]) mark_pause(p) # Final pause fp = long(sent_edits[-1][5]) - long(sent_edits[-2][5]) mark_pause(fp) # Total time total = 0 try: total = long(sent_edits[-1][5]) - long(sent_edits[0][5]) except: io_extra.log(u'Warning: cannot compute total, setting to 0 for ({}, {})'.format(u, j + 1)) # Words words = len(sent_edits[-1][1].split()) # Average pause ratio allp = pause['s'] + pause['m'] + pause['l'] try: apr = (float(pause['t']) / allp) / (float(total) / words) except: # No pauses or no words apr = 0 # Pause to word ratio pwr = float(allp) / words print >>csv_out, u'\t'.join(str(n) for n in (j + 1, ip, fp, pause['s'], pause['m'], pause['l'], total, pause['t'], words, apr, pwr)) csv_out.close()