def handle(self, data): ip = web.ctx.get(u'ip') user = session.get(u'user') task = session.get(u'task') # Not logged in if not user: io_extra.log(u'SUBMIT: unauthorized access: {0}'.format(ip)) return render.info(u'You must be logged in to access this page. Please login using your account information.') # No task chosen if not task: io_extra.log(u'SUBMIT: no task selected: {0} {1}'.format(ip, user)) return render.info(u'You must select a translation task.') ev = track.event(user, task, data) edits = ev.get_edits() # Sync add to event queue queue_lock.acquire() ev_queue.append(ev) for e in edits: ed_queue.append(e) queue_lock.release() # Debug if dbg_track: dbg_lock.acquire() if ev.event == track.FOCUS: dbg_strings[ev.user] = ev.after io_extra.log(u'{0}: |{1}|'.format(ev.user, dbg_strings[ev.user])) if edits: for e in edits: dbg_strings[ev.user] = track.apply_edit(dbg_strings[ev.user], e) io_extra.log(u'{0}: |{1}| {2} {3}'.format(e.user, dbg_strings[ev.user], e.caret, e.data)) else: io_extra.log(u'{0}'.format(ev)) dbg_lock.release()
def add_times(user_dict, data, res, n_sent): times = [[0 for i in range(n_sent)] for i in range(len(user_dict))] # times[user_i][sent_i] = data # Time sentence last focused last_focus = [[-1 for i in range(n_sent)] for i in range(len(user_dict))] # last_focus[user_i][sent_i] = time for r in res: user_i = user_dict.get(r[0], -1) if user_i == -1: continue sent_i = r[1] - 1 op = r[2] time = r[3] if op == track.FOCUS: last_focus[user_i][sent_i] = time elif op == track.BLUR: last = last_focus[user_i][sent_i] if last == -1: io_extra.log(u'Warning: mismatched blur ignored: {}'.format(r)) continue times[user_i][sent_i] += (time - last) last_focus[user_i][sent_i] = -1 # else do not count event for i in range(len(user_dict)): data[i].append(times[i])
def GET(self): ip = web.ctx.get(u'ip') user = session.get(u'user') if not user: io_extra.log(u'EDITOR: unauthorized access: {0}'.format(ip)) else: io_extra.log(u'EDITOR: direct access attempted: {0}'.format(ip)) return render.info(u'This page must be accessed from the translation task list. Please login and select a task.')
def GET(self): ip = web.ctx.get(u'ip') user = session.get(u'user') if not user: io_extra.log(u'LOGOUT: unauthorized access: {0}'.format(ip)) return render.info(u'You must be logged in to logout. Although since you are not logged in, you are in fact logged out.') # End user session session.kill() io_extra.log(u'LOGOUT: user logged out: {0} {1}'.format(ip, user)) return render.info(u'You have successfully logged out.')
def POST(self): ip = web.ctx.get(u'ip') data = web.input() password = data.get(u'adminpass') if password != config[u'admin_password']: io_extra.log(u'ADMIN: bad admin password: {0}'.format(ip)) return render.info(u'Incorrect admin password.') # Logged in as admin session.admin = True return render.admin()
def manual_start(self, realtime_d): tr_lock = self.tr_locks[realtime_d] tr_lock.acquire() tr = self.tr.get(realtime_d, None) if not tr: io_extra.log(u'HUB: starting new translator: {}'.format(realtime_d)) tr = rt.RealtimeTranslator(realtime_d, norm=True) self.tr[realtime_d] = tr else: io_extra.log(u'HUB: already active: {}'.format(realtime_d)) tr_lock.release()
def GET(self): ip = web.ctx.get(u'ip') user = session.get(u'user') # Not logged in if not user: io_extra.log(u'LIST: unauthorized access: {0}'.format(ip)) return render.info(u'You must be logged in to access this page. Please login using your account information.') # Logged in tasks = io_extra.list_tasks(escape=True) # List page uses user DB copy of status (faster, false positives aren't terrible) status = get_user_task_status(user, escape=True) return render.listpage(user, tasks, status)
def GET(self): ip = web.ctx.get(u'ip') user = session.get(u'user') task = session.get(u'task') # Not logged in if not user: io_extra.log(u'DONE: unauthorized access: {0}'.format(ip)) return render.info(u'You must be logged in to access this page. Please login using your account information.') # No task chosen if not task: io_extra.log(u'DONE: no task selected: {0} {1}'.format(ip, user)) return render.info(u'You must select a translation task.') # Mark task completed in user DB res = list(user_db.select(u'status', dict(user=user, task=task), where=u'user = $user and task = $task')) if len(res) == 0: user_db.insert(u'status', user=user, task=task, status=u'finished') else: user_db.update(u'status', vars=dict(user=user, task=task), where=u'user = $user and task = $task', status=u'finished') # Mark task in task DB # Rely on task DB for reporting status and possible errors db = trans_db[task] res = list(db.select(u'status', dict(user=user), where=u'user = $user')) if len(res) == 0: db.insert(u'status', user=user, status=u'finished') io_extra.log(u'DONE: Error? Task finished before started: {0} {1} {2}'.format(ip, user, task)) else: db.update(u'status', vars=dict(user=user), where=u'user = $user', status=u'finished') io_extra.log(u'DONE: User finished task: {0} {1} {2}'.format(ip, user, task)) return render.gotolist()
def POST(self): ip = web.ctx.get(u'ip') user = session.get(u'user') data = web.input() taskdir = data.get(u'taskdir') # Not logged in if not user: io_extra.log(u'EDITOR: unauthorized access: {0}'.format(ip)) return render.info(u'You must be logged in to access this page. Please login using your account information.') # No task chosen if not taskdir: io_extra.log(u'EDITOR: no task selected: {0} {1}'.format(ip, user)) return render.info(u'You must select a translation task.') # Set task in session session.task = taskdir # Load task data from task dir and database db = trans_db[taskdir] if not db: db = io_extra.get_trans_db(os.path.join(io_extra.TASK_DIR, taskdir, 'data.db')) db.printing = dbg_sql trans_db[taskdir] = db task = io_extra.task(user, taskdir, db) # Loads everything into a task object # Config for realtime rtd[(user, taskdir)] = task.realtime_d # Update status in user DB res = list(user_db.select(u'status', dict(user=user, task=taskdir), where=u'user = $user and task = $task')) if len(res) == 0: user_db.insert(u'status', user=user, task=taskdir, status=u'started') else: user_db.update(u'status', vars=dict(user=user, task=taskdir), where=u'user = $user and task = $task', status=u'started') # Update status in task DB res = list(db.select(u'status', dict(user=user), where=u'user = $user')) db_write_lock.acquire() if len(res) == 0: db.insert(u'status', user=user, status=u'started') else: db.update(u'status', vars=dict(user=user), where=u'user = $user', status=u'started') db_write_lock.release() # Load editor return render.editor(task)
def POST(self): message = '' try: ip = web.ctx.get(u'ip') data = web.input() email = data.get(u'recemail') (log_msg, user_msg) = recover_account(ip, email)[1:] io_extra.log(log_msg) message = user_msg except Exception: io_extra.log(u'REC: exception:') io_extra.log(traceback.format_exc()) message = u'Error encountered. Please try again.' return render.info(message)
def POST(self): try: # First check login information ip = web.ctx.get(u'ip') ua = web.ctx.env[u'HTTP_USER_AGENT'] if u'HTTP_USER_AGENT' in web.ctx.env else 'No user-agent' data = web.input() user = data.get(u'uid') password = data.get(u'pass') res = list(user_db.select(u'users', dict(user=user, password=password), where=u'user = $user and password = $password')) if len(res) == 0: io_extra.log(u'LOGIN: bad account: {0} {1} {2}'.format(ip, user, ua)) return render.info(u'Login information does not match any account. Please try again.') # Login success: set session session.user = user session.group = res[0]['groupname'] io_extra.log(u'LOGIN: user logged in: {0} {1} {2}'.format(ip, user, ua)) # Redirect to list now that user is logged in return render.gotolist() except Exception: io_extra.log(u'LOGIN: exception:') io_extra.log(traceback.format_exc()) return render.info(u'Error encountered. Please try again')
def handle(self, data): global dbg_editor, hub, off_hub ip = web.ctx.get(u'ip') user = session.get(u'user') task = session.get(u'task') realtime_d = rtd[(user, task)] # Not logged in if not user: io_extra.log(u'SUBMIT: unauthorized access: {0}'.format(ip)) return render.info(u'You must be logged in to access this page. Please login using your account information.') # No task chosen if not task: io_extra.log(u'SUBMIT: no task selected: {0} {1}'.format(ip, user)) return render.info(u'You must select a translation task.') # Translation data next_id = data[u'i'] source = data[u's'] if source == '': source = None reference = data[u'r'] if reference == '': reference = None next = data[u'n'] if next == '': next = None t = data[u't'] static = (t == track.REALTIME_STATIC) # First sentence for a non-static translator? (don't (re)learn) first = ((data[u'f'] == 'true') and not static) # Editor debugging, don't call hub if dbg_editor: io_extra.log(u'DBG-HUB: ({}-{}): {} ||| {} ||| {}'.format(user, task, source, reference, next)) return 'test string' # Offline translation if task in off_hub: if next: translation = off_hub[task][next] trans_db[task].insert(u'mt', time=int(time.time() * 1000), user=user, sent=next_id, text=translation) return translation return None # All static tasks use the same instance return hub.interact('STATIC' if static else user, task, realtime_d, source, reference, next, next_id, static, first)
def POST(self): message = '' try: ip = web.ctx.get(u'ip') data = web.input() user = data.get(u'newuid') code = data.get(u'newgroup') password = data.get(u'newpass') email = data.get(u'newemail') reload_groups() # Make sure groups are up to date # Attempt to add user (log_msg, user_msg) = add_user(ip, user, code, password, email)[1:] io_extra.log(log_msg) message = user_msg except Exception: io_extra.log(u'REG: exception:') io_extra.log(traceback.format_exc()) message = u'Error encountered. Please try again.' # Notify user return render.info(message)
def keep_clean(self): while True: time.sleep(1) self.cleaner_lock.acquire() # Done if self.next_clean == 0: break now = int(time.time()) if now >= self.next_clean: for id in list(self.ids): id_lock = self.id_locks[id] id_lock.acquire() try: # Drop context if now - self.last[id] >= self.timeout: io_extra.log(u'HUB: id timeout: {}'.format(id)) self.drop(id) # Check if translator closes rtd = self.id_rtd.pop(id) tr_lock = self.tr_locks[rtd] tr_lock.acquire() self.tr_nctx[rtd] -= 1 # Close if 0 active contexts # Currently disabled due to segfault in grammar extractor in Cython-generated code after restart # TODO: investigate what's causing this so we can free idle translators if False and self.tr_nctx[rtd] == 0: io_extra.log(u'HUB: closing translator: {}'.format(rtd)) tr = self.tr.pop(rtd) tr.close() self.tr_locks.pop(rtd) self.tr_nctx.pop(rtd) tr_lock.release() except: io_extra.log(u'HUB: id error (likely forced restart): {}'.format(id)) id_lock.release() self.next_clean = int(time.time()) + self.clean_freq self.cleaner_lock.release()
def interact(self, user, task, realtime_d, source, reference, next, next_id, static=False, first=False): '''In order: - learn from source-reference - save state - translate next First sentence: source and reference == None Last sentence: next == None Each user-task pair should map to ONE realtime_d Each realtime_d can serve MANY user-task pairs across users and tasks ''' # Id is user-task. One translator can have multiple entries for same user or task, but user-task is unique. id = u'{}-{}'.format(user, task) id_lock = self.id_locks[id] id_lock.acquire() # Get rtd, translator by id rtd = self.id_rtd.get(id, None) if not rtd: rtd = realtime_d self.id_rtd[id] = rtd tr_lock = self.tr_locks[rtd] tr_lock.acquire() tr = self.tr.get(rtd, None) # Start translator if needed if not tr: io_extra.log(u'HUB: starting new translator: {}'.format(rtd)) tr = rt.RealtimeTranslator(rtd, norm=True) self.tr[rtd] = tr # Force context restart if first sentence requested (editor re-start) if first and id in self.ids: io_extra.log(u'HUB: editor restart detected, forcing restart: {}'.format(id)) self.drop(id, keep_lock=True) self.tr_nctx[rtd] -= 1 # Start context if needed if id not in self.ids: self.ids.add(id) # Count as context for realtime_d self.tr_nctx[rtd] += 1 # Release lock after handling possible first context start to avoid cleaner removing immediately (0 contexts) tr_lock.release() # Load state if present db = self.db[task] self.db_lock.acquire() res = list(db.select(u'state', dict(user=user), where=u'user = $user', order=u'time DESC')) self.db_lock.release() if len(res) > 0: io_extra.log(u'HUB: starting context and loading state: {} ({}) {}'.format(rtd, self.tr_nctx[rtd], id)) # convert to utf-8 for Realtime sio = StringIO.StringIO(res[0]['state'].encode('utf-8')) tr.load_state(sio, id.encode('utf-8')) else: io_extra.log(u'HUB: starting new context: {} ({}) {}'.format(rtd, self.tr_nctx[rtd], id)) else: # Release lock immediately if id exists tr_lock.release() io_extra.log(u'HUB: interact ({}): {} ||| {} ||| {}'.format(id, source, reference, next)) # Learn and update state in DB unless first sentence or static if not static and None not in (source, reference): # Learn # Give Realtime utf-8-ecoded text tr.learn(source.encode('utf-8'), reference.encode('utf-8'), id.encode('utf-8')) # Save state at time sio = StringIO.StringIO() tr.save_state(sio, id.encode('utf-8')) db = self.db[task] self.db_lock.acquire() # milliseconds store unicode db.insert(u'state', time=int(time.time() * 1000), user=user, state=sio.getvalue().decode('utf-8')) self.db_lock.release() # Translate, except last sentence hyp = None if next is not None: # Translate # Give Realtime utf-8-ecoded text hyp = tr.translate(next.encode('utf-8'), id.encode('utf-8')).decode('utf-8') db = self.db[task] self.db_lock.acquire() # milliseconds already unicode db.insert(u'mt', time=int(time.time() * 1000), user=user, sent=next_id, text=hyp) self.db_lock.release() # last before release/return self.last[id] = int(time.time()) id_lock.release() return hyp
def main(argv): global dbg_editor, dbg_sql, dbg_track, hub, off_hub # Unicode I/O sys.stdin = codecs.getreader('UTF-8')(sys.stdin) sys.stdout = codecs.getwriter('UTF-8')(sys.stdout) sys.stderr = codecs.getwriter('UTF-8')(sys.stderr) if len(argv[1:]) > 0 and argv[1] in ('-h', '--help'): print >> sys.stderr, u'TransCenter Live Server' print >> sys.stderr, u'Usage: {0} [-p port=8080] [-t timeout=600] [--dbg-editor] [--dbg-sql] [--dbg-track] [--dbg-webpy]'.format(argv[0]) print >> sys.stderr, u'(crtl+c to stop server)' sys.exit(2) opts, argv = getopt.getopt(argv[1:], 'p:t:', ['dbg-editor', 'dbg-sql', 'dbg-track', 'dbg-webpy']) sys.argv = [sys.argv[0]] test_file = False timeout = 600 for o, a in opts: if o == '-p': sys.argv.append(a) elif o == '-t': timeout = int(a) elif o == '--dbg-editor': dbg_editor = True elif o == '--dbg-sql': dbg_sql = True user_db.printing = dbg_sql elif o == '--dbg-track': dbg_track = True elif o == '--dbg-webpy': web.config['debug'] = True # Realtime hub io_extra.log(u'STATUS: starting Realtime hub...') hub = rt_hub.RealtimeHub(trans_db, db_write_lock, timeout=timeout) if not dbg_editor else None # [task][source] = target off_hub = {} # Init translators # TODO: change to on-demand/timeout for task in sorted((f for f in os.listdir(io_extra.TASK_DIR) if f.endswith('.data'))): config = io_extra.read_cfg(os.path.join(io_extra.TASK_DIR, task, 'config.txt')) if config['task'] in (track.REALTIME, track.REALTIME_STATIC): realtime_d = config['config'] hub.manual_start(realtime_d) # Load offline data elif config['task'] == 'offline': off_hub[task] = dict((s.strip(), t.strip()) for (s, t) in itertools.izip(codecs.open(os.path.join(io_extra.TASK_DIR, task, 'source.txt'), 'rb', 'UTF-8'), codecs.open(os.path.join(io_extra.TASK_DIR, task, 'target.txt'), 'rb', 'UTF-8'))) io_extra.log(u'STATUS: Realtime hub started.') # Database writer writer = threading.Thread(target=io_extra.run_database_writer, args=(trans_db, db_write_lock, ev_queue, ed_queue, queue_lock)) writer.start() io_extra.log(u'STATUS: Database writing started.') # Start web app io_extra.log(u'STATUS: Main webpy app starting.') app.run() # Cleanup ev_queue.append(None) if not dbg_editor: hub.close() io_extra.log(u'STATUS: Realtime hub closed.') writer.join() io_extra.log(u'STATUS: All database writes finished.') session_store.cleanup(0) io_extra.log(u'STATUS: Ready to shutdown.')
def send_account(*args): web.sendmail(*args) io_extra.log(u'REC: Account info sent: {0}'.format(email))
def report_edits_pauses(t_dir, r_dir): users = [] # Users in order user_dict = {} # user_dict[user] = user_i user_trans = [] # user_trans[user_i][sent_i] = [inter1, inter2] # where intermediate is (left, diff, right, type, time) base_trans = collections.defaultdict(dict) # base_trans[user_i] = list of translations before editing # Get the directory name / database entry for this task task_id = os.path.basename(os.path.abspath(t_dir)) # Read config config = io_extra.read_cfg(os.path.join(t_dir, 'config.txt')) task = config[u'task'] conn = sqlite3.connect(os.path.join(t_dir, 'data.db')) c = conn.cursor() # Find users who finished this task i = 0 for (u,) in c.execute('''SELECT DISTINCT user FROM status WHERE status='finished' ORDER BY user ASC'''): users.append(u) user_dict[u] = i i += 1 # Populate base translations (mt) for each user for user in users: for (sent, text) in c.execute('''SELECT sent, text FROM mt WHERE user=? ORDER BY sent ASC''', ('STATIC' if task == track.REALTIME_STATIC else user,)): base_trans[user][sent] = text # Each user starts with a list of intermediates, starting with the base translation user_trans.append([[['', t, '', -1, 'start', '']] for (i, t) in sorted(base_trans[user].iteritems())]) # Read user edits res = c.execute('''SELECT user, sent, caret, op, input, time FROM edits ORDER BY user ASC, sent ASC, time ASC''') # Trace edits, annotate with types, times for r in res: user_i = user_dict.get(r[0], -1) if user_i == -1: continue sent_i = r[1] - 1 caret = r[2] op = r[3] diff = r[4] time = r[5] if user_trans[user_i][sent_i][-1][3] in (-1, track.INS): prev = u''.join(user_trans[user_i][sent_i][-1][:3]) elif user_trans[user_i][sent_i][-1][3] == track.DEL: prev = u''.join((user_trans[user_i][sent_i][-1][0], user_trans[user_i][sent_i][-1][2])) else: io_extra.log('Unknown edit op, using emptry string') prev = u'' left = prev[:caret] # For deletes, diff overlaps prev, so cut out right = prev[caret + len(diff):] if op == track.DEL else prev[caret:] if op == track.INS: opclass = u'ins' elif op == track.DEL: opclass = u'del' else: # Only count inserts and deletes continue user_trans[user_i][sent_i].append([left, diff, right, op, opclass, unicode(time)]) # Final outputs for trans in user_trans: for sent in trans: op = sent[-1][3] if op == -1: prev = sent[-1][1] elif op == track.INS: prev = u''.join(sent[-1][:3]) else: prev = u''.join((sent[-1][0], sent[-1][2])) sent.append(['', prev, '', -1, 'end', '']) # Pull initial and final focus/blur times res = c.execute('''SELECT user, sent, time FROM events ORDER BY user ASC, sent ASC, time ASC''') i = -1 for r in res: user_i = user_dict.get(r[0], -1) if user_i == -1: continue sent_i = r[1] - 1 # Initial focus if sent_i != i: user_trans[user_i][sent_i][0][5] = str(r[2]) else: # overwrite with every following record user_trans[user_i][sent_i][-1][5] = str(r[2]) i = sent_i # Write user-specific reports (CSV) for (i, u) in enumerate(users): csv_out = codecs.open(os.path.join(r_dir, 'edits.' + u + '.csv'), mode='wb', encoding='UTF-8') print >>csv_out, u'\t'.join(('Time', 'Operation', 'Left', 'Edit', 'Right')) for sent_edits in user_trans[i]: for edit in sent_edits: print >>csv_out, u'\t'.join((edit[5], edit[4], edit[0], edit[1], edit[2])) # "empty" line print >>csv_out, u'\t'.join(('', '', '', '', '')) csv_out.close() # Escape everything for trans in user_trans: for sent in trans: for edit in sent: edit[0] = io_extra.html_escape(edit[0]) edit[1] = io_extra.html_escape(edit[1]) edit[2] = io_extra.html_escape(edit[2]) c.close() # Write user-specific reports (HTML) for (i, u) in enumerate(users): # Write out report using template html_out = codecs.open(os.path.join(r_dir, 'edits.' + u + '.html'), mode='wb', encoding='UTF-8') html_out.write(unicode(render.report_edits(user_trans[i]))) html_out.close() # Write user-specific pause reports (CSV only) for (i, u) in enumerate(users): csv_out = codecs.open(os.path.join(r_dir, 'pause.' + u + '.csv'), mode='wb', encoding='UTF-8') print >>csv_out, u'\t'.join(('ID', 'Initial', 'Final', 'Short', 'Medium', 'Long', 'Total Time', 'Pause Time', 'Words', 'APR', 'PWR')) for (j, sent_edits) in enumerate(user_trans[i]): # Count pauses (initial, final, short, medium, long) ip = 0 fp = 0 pause = {'s': 0, 'm': 0, 'l': 0, 't': 0} total = 0 words = 0 apr = 0 pwr = 0 def mark_pause(p): # Actually a pause if p >= PAUSE_SHORT: if p >= PAUSE_LONG: pause['l'] += 1 elif p >= PAUSE_MEDIUM: pause['m'] += 1 else: # p >= PAUSE_SHORT pause['s'] += 1 pause['t'] += p # Initial pause: ip = 0 try: ip = long(sent_edits[1][5]) - long(sent_edits[0][5]) except: io_extra.log(u'Warning: cannot compute initial pause, setting to 0 for ({}, {})'.format(u, j + 1)) mark_pause(ip) # If edited if len(sent_edits) > 2: for k in range(2, len(sent_edits) - 1): p = long(sent_edits[k][5]) - long(sent_edits[k - 1][5]) mark_pause(p) # Final pause fp = long(sent_edits[-1][5]) - long(sent_edits[-2][5]) mark_pause(fp) # Total time total = 0 try: total = long(sent_edits[-1][5]) - long(sent_edits[0][5]) except: io_extra.log(u'Warning: cannot compute total, setting to 0 for ({}, {})'.format(u, j + 1)) # Words words = len(sent_edits[-1][1].split()) # Average pause ratio allp = pause['s'] + pause['m'] + pause['l'] try: apr = (float(pause['t']) / allp) / (float(total) / words) except: # No pauses or no words apr = 0 # Pause to word ratio pwr = float(allp) / words print >>csv_out, u'\t'.join(str(n) for n in (j + 1, ip, fp, pause['s'], pause['m'], pause['l'], total, pause['t'], words, apr, pwr)) csv_out.close()