def main(argv): global dbg_editor, dbg_sql, dbg_track, hub, off_hub # Unicode I/O sys.stdin = codecs.getreader('UTF-8')(sys.stdin) sys.stdout = codecs.getwriter('UTF-8')(sys.stdout) sys.stderr = codecs.getwriter('UTF-8')(sys.stderr) if len(argv[1:]) > 0 and argv[1] in ('-h', '--help'): print >> sys.stderr, u'TransCenter Live Server' print >> sys.stderr, u'Usage: {0} [-p port=8080] [-t timeout=600] [--dbg-editor] [--dbg-sql] [--dbg-track] [--dbg-webpy]'.format(argv[0]) print >> sys.stderr, u'(crtl+c to stop server)' sys.exit(2) opts, argv = getopt.getopt(argv[1:], 'p:t:', ['dbg-editor', 'dbg-sql', 'dbg-track', 'dbg-webpy']) sys.argv = [sys.argv[0]] test_file = False timeout = 600 for o, a in opts: if o == '-p': sys.argv.append(a) elif o == '-t': timeout = int(a) elif o == '--dbg-editor': dbg_editor = True elif o == '--dbg-sql': dbg_sql = True user_db.printing = dbg_sql elif o == '--dbg-track': dbg_track = True elif o == '--dbg-webpy': web.config['debug'] = True # Realtime hub io_extra.log(u'STATUS: starting Realtime hub...') hub = rt_hub.RealtimeHub(trans_db, db_write_lock, timeout=timeout) if not dbg_editor else None # [task][source] = target off_hub = {} # Init translators # TODO: change to on-demand/timeout for task in sorted((f for f in os.listdir(io_extra.TASK_DIR) if f.endswith('.data'))): config = io_extra.read_cfg(os.path.join(io_extra.TASK_DIR, task, 'config.txt')) if config['task'] in (track.REALTIME, track.REALTIME_STATIC): realtime_d = config['config'] hub.manual_start(realtime_d) # Load offline data elif config['task'] == 'offline': off_hub[task] = dict((s.strip(), t.strip()) for (s, t) in itertools.izip(codecs.open(os.path.join(io_extra.TASK_DIR, task, 'source.txt'), 'rb', 'UTF-8'), codecs.open(os.path.join(io_extra.TASK_DIR, task, 'target.txt'), 'rb', 'UTF-8'))) io_extra.log(u'STATUS: Realtime hub started.') # Database writer writer = threading.Thread(target=io_extra.run_database_writer, args=(trans_db, db_write_lock, ev_queue, ed_queue, queue_lock)) writer.start() io_extra.log(u'STATUS: Database writing started.') # Start web app io_extra.log(u'STATUS: Main webpy app starting.') app.run() # Cleanup ev_queue.append(None) if not dbg_editor: hub.close() io_extra.log(u'STATUS: Realtime hub closed.') writer.join() io_extra.log(u'STATUS: All database writes finished.') session_store.cleanup(0) io_extra.log(u'STATUS: Ready to shutdown.')
def reload_groups(): global groups # These are stored val: key groups = dict((v, k) for (k, v) in io_extra.read_cfg(os.path.join(io_extra.DATA_DIR, 'groups.txt')).iteritems())
def report_edits_pauses(t_dir, r_dir): users = [] # Users in order user_dict = {} # user_dict[user] = user_i user_trans = [] # user_trans[user_i][sent_i] = [inter1, inter2] # where intermediate is (left, diff, right, type, time) base_trans = collections.defaultdict(dict) # base_trans[user_i] = list of translations before editing # Get the directory name / database entry for this task task_id = os.path.basename(os.path.abspath(t_dir)) # Read config config = io_extra.read_cfg(os.path.join(t_dir, 'config.txt')) task = config[u'task'] conn = sqlite3.connect(os.path.join(t_dir, 'data.db')) c = conn.cursor() # Find users who finished this task i = 0 for (u,) in c.execute('''SELECT DISTINCT user FROM status WHERE status='finished' ORDER BY user ASC'''): users.append(u) user_dict[u] = i i += 1 # Populate base translations (mt) for each user for user in users: for (sent, text) in c.execute('''SELECT sent, text FROM mt WHERE user=? ORDER BY sent ASC''', ('STATIC' if task == track.REALTIME_STATIC else user,)): base_trans[user][sent] = text # Each user starts with a list of intermediates, starting with the base translation user_trans.append([[['', t, '', -1, 'start', '']] for (i, t) in sorted(base_trans[user].iteritems())]) # Read user edits res = c.execute('''SELECT user, sent, caret, op, input, time FROM edits ORDER BY user ASC, sent ASC, time ASC''') # Trace edits, annotate with types, times for r in res: user_i = user_dict.get(r[0], -1) if user_i == -1: continue sent_i = r[1] - 1 caret = r[2] op = r[3] diff = r[4] time = r[5] if user_trans[user_i][sent_i][-1][3] in (-1, track.INS): prev = u''.join(user_trans[user_i][sent_i][-1][:3]) elif user_trans[user_i][sent_i][-1][3] == track.DEL: prev = u''.join((user_trans[user_i][sent_i][-1][0], user_trans[user_i][sent_i][-1][2])) else: io_extra.log('Unknown edit op, using emptry string') prev = u'' left = prev[:caret] # For deletes, diff overlaps prev, so cut out right = prev[caret + len(diff):] if op == track.DEL else prev[caret:] if op == track.INS: opclass = u'ins' elif op == track.DEL: opclass = u'del' else: # Only count inserts and deletes continue user_trans[user_i][sent_i].append([left, diff, right, op, opclass, unicode(time)]) # Final outputs for trans in user_trans: for sent in trans: op = sent[-1][3] if op == -1: prev = sent[-1][1] elif op == track.INS: prev = u''.join(sent[-1][:3]) else: prev = u''.join((sent[-1][0], sent[-1][2])) sent.append(['', prev, '', -1, 'end', '']) # Pull initial and final focus/blur times res = c.execute('''SELECT user, sent, time FROM events ORDER BY user ASC, sent ASC, time ASC''') i = -1 for r in res: user_i = user_dict.get(r[0], -1) if user_i == -1: continue sent_i = r[1] - 1 # Initial focus if sent_i != i: user_trans[user_i][sent_i][0][5] = str(r[2]) else: # overwrite with every following record user_trans[user_i][sent_i][-1][5] = str(r[2]) i = sent_i # Write user-specific reports (CSV) for (i, u) in enumerate(users): csv_out = codecs.open(os.path.join(r_dir, 'edits.' + u + '.csv'), mode='wb', encoding='UTF-8') print >>csv_out, u'\t'.join(('Time', 'Operation', 'Left', 'Edit', 'Right')) for sent_edits in user_trans[i]: for edit in sent_edits: print >>csv_out, u'\t'.join((edit[5], edit[4], edit[0], edit[1], edit[2])) # "empty" line print >>csv_out, u'\t'.join(('', '', '', '', '')) csv_out.close() # Escape everything for trans in user_trans: for sent in trans: for edit in sent: edit[0] = io_extra.html_escape(edit[0]) edit[1] = io_extra.html_escape(edit[1]) edit[2] = io_extra.html_escape(edit[2]) c.close() # Write user-specific reports (HTML) for (i, u) in enumerate(users): # Write out report using template html_out = codecs.open(os.path.join(r_dir, 'edits.' + u + '.html'), mode='wb', encoding='UTF-8') html_out.write(unicode(render.report_edits(user_trans[i]))) html_out.close() # Write user-specific pause reports (CSV only) for (i, u) in enumerate(users): csv_out = codecs.open(os.path.join(r_dir, 'pause.' + u + '.csv'), mode='wb', encoding='UTF-8') print >>csv_out, u'\t'.join(('ID', 'Initial', 'Final', 'Short', 'Medium', 'Long', 'Total Time', 'Pause Time', 'Words', 'APR', 'PWR')) for (j, sent_edits) in enumerate(user_trans[i]): # Count pauses (initial, final, short, medium, long) ip = 0 fp = 0 pause = {'s': 0, 'm': 0, 'l': 0, 't': 0} total = 0 words = 0 apr = 0 pwr = 0 def mark_pause(p): # Actually a pause if p >= PAUSE_SHORT: if p >= PAUSE_LONG: pause['l'] += 1 elif p >= PAUSE_MEDIUM: pause['m'] += 1 else: # p >= PAUSE_SHORT pause['s'] += 1 pause['t'] += p # Initial pause: ip = 0 try: ip = long(sent_edits[1][5]) - long(sent_edits[0][5]) except: io_extra.log(u'Warning: cannot compute initial pause, setting to 0 for ({}, {})'.format(u, j + 1)) mark_pause(ip) # If edited if len(sent_edits) > 2: for k in range(2, len(sent_edits) - 1): p = long(sent_edits[k][5]) - long(sent_edits[k - 1][5]) mark_pause(p) # Final pause fp = long(sent_edits[-1][5]) - long(sent_edits[-2][5]) mark_pause(fp) # Total time total = 0 try: total = long(sent_edits[-1][5]) - long(sent_edits[0][5]) except: io_extra.log(u'Warning: cannot compute total, setting to 0 for ({}, {})'.format(u, j + 1)) # Words words = len(sent_edits[-1][1].split()) # Average pause ratio allp = pause['s'] + pause['m'] + pause['l'] try: apr = (float(pause['t']) / allp) / (float(total) / words) except: # No pauses or no words apr = 0 # Pause to word ratio pwr = float(allp) / words print >>csv_out, u'\t'.join(str(n) for n in (j + 1, ip, fp, pause['s'], pause['m'], pause['l'], total, pause['t'], words, apr, pwr)) csv_out.close()
def reload_config(): global config config = io_extra.read_cfg(os.path.join(io_extra.DATA_DIR, 'config.txt'))
def get_stats(t_dir): header = [] # Column headings static_data = [] # Columns that are the same for all users users = [] # Users in order user_dict = {} # user_dict[user] = user_i user_data = [] # data[user_i][type_i] = [val1, val2, ...] col_avg = [] # Does column average (or is static)? # True: average or use static, False: drop in average report # Get the directory name / database entry for this task task_id = os.path.basename(os.path.abspath(t_dir)) # Read config config = io_extra.read_cfg(os.path.join(t_dir, 'config.txt')) task = config[u'task'] # Populate left/right plus headers (including next header for user input) if task in (track.REALTIME, track.REALTIME_STATIC, track.OFFLINE): header.append(H_SOURCE) col_avg.append(True) static_data.append(io_extra.read_utf8(os.path.join(t_dir, 'source.txt'))) header.append(H_MT) col_avg.append(False) header.append(H_POST_EDITED) col_avg.append(False) n_sent = len(static_data[0]) # Source length # Pre-pend sentence IDs header.insert(0, H_ID) static_data.insert(0, range(1, n_sent + 1)) col_avg.insert(0, True) # User Data conn = sqlite3.connect(os.path.join(t_dir, 'data.db')) c = conn.cursor() # Find users who finished this task i = 0 for (u,) in c.execute('''SELECT DISTINCT user FROM status WHERE status='finished' ORDER BY user ASC'''): users.append(u) user_data.append([]) user_dict[u] = i i += 1 # MT res = c.execute('''SELECT user, sent, text FROM mt ORDER BY user ASC, sent ASC, time ASC''') add_vals(user_dict, user_data, res, n_sent, mt=True, task=task) # Post-edited res = c.execute('''SELECT user, sent, text FROM translations ORDER BY user ASC, sent ASC, time ASC''') add_vals(user_dict, user_data, res, n_sent) # Compute HTER header.append(H_HTER) for user in user_data: user.append(hter(user[0], user[1], norm=True)) # Get user ratings header.append(H_RATING) col_avg.append(True) res = c.execute('''SELECT user, sent, rating FROM ratings ORDER BY user ASC, sent ASC, time ASC''') add_vals(user_dict, user_data, res, n_sent) # Key/mouse counts res = c.execute('''SELECT user, sent, op, count FROM counts ORDER BY user ASC, sent ASC, time ASC''') header.append(H_KEYPRESS) header.append(H_MOUSECLICK) col_avg.append(True) col_avg.append(True) add_km_sums(user_dict, user_data, res, n_sent) # User edit counts res = c.execute('''SELECT user, sent, caret, op, input FROM edits ORDER BY user ASC, sent ASC, time ASC''') header.append(H_EDITS) add_edits(user_dict, user_data, res, n_sent) col_avg.append(True) # Times from sentence focus/blur res = c.execute('''SELECT user, sent, op, time FROM events ORDER BY user ASC, sent ASC, time ASC''') header.append(H_TIME) add_times(user_dict, user_data, res, n_sent) col_avg.append(True) c.close() return (config, header, col_avg, static_data, users, user_data)