Example #1
0
def get_user_task_status(user, escape=False):
    res = user_db.select(u'status', dict(user=user), where=u'user = $user')
    pairs = ((r['task'], r['status']) for r in res)
    if escape:
        pairs = ((io_extra.html_escape(k), v) for (k, v) in pairs)
    return dict(pairs)
Example #2
0
def report_edits_pauses(t_dir, r_dir):

    users = []  # Users in order
    user_dict = {}  # user_dict[user] = user_i
    user_trans = []  # user_trans[user_i][sent_i] = [inter1, inter2]
                     # where intermediate is (left, diff, right, type, time)
    base_trans = collections.defaultdict(dict)  # base_trans[user_i] = list of translations before editing

    # Get the directory name / database entry for this task
    task_id = os.path.basename(os.path.abspath(t_dir))

    # Read config
    config = io_extra.read_cfg(os.path.join(t_dir, 'config.txt'))
    task = config[u'task']

    conn = sqlite3.connect(os.path.join(t_dir, 'data.db'))
    c = conn.cursor()
    # Find users who finished this task
    i = 0
    for (u,) in c.execute('''SELECT DISTINCT user FROM status WHERE status='finished' ORDER BY user ASC'''):
        users.append(u)
        user_dict[u] = i
        i += 1

    # Populate base translations (mt) for each user
    for user in users:
        for (sent, text) in c.execute('''SELECT sent, text FROM mt WHERE user=? ORDER BY sent ASC''', ('STATIC' if task == track.REALTIME_STATIC else user,)):
            base_trans[user][sent] = text
        # Each user starts with a list of intermediates, starting with the base translation
        user_trans.append([[['', t, '', -1, 'start', '']] for (i, t) in sorted(base_trans[user].iteritems())])

    # Read user edits
    res = c.execute('''SELECT user, sent, caret, op, input, time FROM edits ORDER BY user ASC, sent ASC, time ASC''')

    # Trace edits, annotate with types, times
    for r in res:
        user_i = user_dict.get(r[0], -1)
        if user_i == -1:
            continue
        sent_i = r[1] - 1
        caret = r[2]
        op = r[3]
        diff = r[4]
        time = r[5]
        if user_trans[user_i][sent_i][-1][3] in (-1, track.INS):
            prev = u''.join(user_trans[user_i][sent_i][-1][:3])
        elif user_trans[user_i][sent_i][-1][3] == track.DEL:
            prev = u''.join((user_trans[user_i][sent_i][-1][0], user_trans[user_i][sent_i][-1][2]))
        else:
            io_extra.log('Unknown edit op, using emptry string')
            prev = u''
        left = prev[:caret]
        # For deletes, diff overlaps prev, so cut out
        right = prev[caret + len(diff):] if op == track.DEL else prev[caret:]
        if op == track.INS:
            opclass = u'ins'
        elif op == track.DEL:
            opclass = u'del'
        else:
            # Only count inserts and deletes
            continue
        user_trans[user_i][sent_i].append([left, diff, right, op, opclass, unicode(time)])

    # Final outputs
    for trans in user_trans:
        for sent in trans:
            op = sent[-1][3]
            if op == -1:
                prev = sent[-1][1]
            elif op == track.INS:
                prev = u''.join(sent[-1][:3])
            else:
                prev = u''.join((sent[-1][0], sent[-1][2]))
            sent.append(['', prev, '', -1, 'end', ''])

    # Pull initial and final focus/blur times
    res = c.execute('''SELECT user, sent, time FROM events ORDER BY user ASC, sent ASC, time ASC''')
    i = -1
    for r in res:
        user_i = user_dict.get(r[0], -1)
        if user_i == -1:
            continue
        sent_i = r[1] - 1
        # Initial focus
        if sent_i != i:
            user_trans[user_i][sent_i][0][5] = str(r[2])
        else:
            # overwrite with every following record
            user_trans[user_i][sent_i][-1][5] = str(r[2])
        i = sent_i

    # Write user-specific reports (CSV)
    for (i, u) in enumerate(users):
        csv_out = codecs.open(os.path.join(r_dir, 'edits.' + u + '.csv'), mode='wb', encoding='UTF-8')
        print >>csv_out, u'\t'.join(('Time', 'Operation', 'Left', 'Edit', 'Right'))
        for sent_edits in user_trans[i]:
            for edit in sent_edits:
                print >>csv_out, u'\t'.join((edit[5], edit[4], edit[0], edit[1], edit[2]))
            # "empty" line
            print >>csv_out, u'\t'.join(('', '', '', '', ''))
        csv_out.close()

    # Escape everything
    for trans in user_trans:
        for sent in trans:
            for edit in sent:
                edit[0] = io_extra.html_escape(edit[0])
                edit[1] = io_extra.html_escape(edit[1])
                edit[2] = io_extra.html_escape(edit[2])
    c.close()

    # Write user-specific reports (HTML)
    for (i, u) in enumerate(users):
        # Write out report using template
        html_out = codecs.open(os.path.join(r_dir, 'edits.' + u + '.html'), mode='wb', encoding='UTF-8')
        html_out.write(unicode(render.report_edits(user_trans[i])))
        html_out.close()

    # Write user-specific pause reports (CSV only)
    for (i, u) in enumerate(users):
        csv_out = codecs.open(os.path.join(r_dir, 'pause.' + u + '.csv'), mode='wb', encoding='UTF-8')
        print >>csv_out, u'\t'.join(('ID', 'Initial', 'Final', 'Short', 'Medium', 'Long', 'Total Time', 'Pause Time', 'Words', 'APR', 'PWR'))
        for (j, sent_edits) in enumerate(user_trans[i]):
            # Count pauses (initial, final, short, medium, long)
            ip = 0
            fp = 0
            pause = {'s': 0, 'm': 0, 'l': 0, 't': 0}
            total = 0
            words = 0
            apr = 0
            pwr = 0
            def mark_pause(p):
                # Actually a pause
                if p >= PAUSE_SHORT:
                    if p >= PAUSE_LONG:
                        pause['l'] += 1
                    elif p >= PAUSE_MEDIUM:
                        pause['m'] += 1
                    else:
                        # p >= PAUSE_SHORT
                        pause['s'] += 1
                    pause['t'] += p
            # Initial pause:
            ip = 0
            try:
                ip = long(sent_edits[1][5]) - long(sent_edits[0][5])
            except:
                io_extra.log(u'Warning: cannot compute initial pause, setting to 0 for ({}, {})'.format(u, j + 1))
            mark_pause(ip)
            # If edited
            if len(sent_edits) > 2:
                for k in range(2, len(sent_edits) - 1):
                    p = long(sent_edits[k][5]) - long(sent_edits[k - 1][5])
                    mark_pause(p)
                # Final pause
                fp = long(sent_edits[-1][5]) - long(sent_edits[-2][5])
                mark_pause(fp)
            # Total time
            total = 0
            try:
                total = long(sent_edits[-1][5]) - long(sent_edits[0][5])
            except:
                io_extra.log(u'Warning: cannot compute total, setting to 0 for ({}, {})'.format(u, j + 1))
            # Words
            words = len(sent_edits[-1][1].split())
            # Average pause ratio
            allp = pause['s'] + pause['m'] + pause['l']
            try:
                apr = (float(pause['t']) / allp) / (float(total) / words)
            except:
                # No pauses or no words
                apr = 0
            # Pause to word ratio
            pwr = float(allp) / words
            print >>csv_out, u'\t'.join(str(n) for n in (j + 1, ip, fp, pause['s'], pause['m'], pause['l'], total, pause['t'], words, apr, pwr))
        csv_out.close()