def enable_label_for_sub_task(subTaskId, labelId): rec = m.ShadowedLabel.query.get((subTaskId, labelId)) if rec: SS.delete(rec) return jsonify( message=_('label {0} has been enabled for sub task {1}').format( labelId, subTaskId))
def create_sub_task_rate_record(subTaskId): subTask = m.SubTask.query.get(subTaskId) if not subTask: raise InvalidUsage(_('sub task {0} not found').format(subTaskId)) data = MyForm( Field('rateId', is_mandatory=True, validators=[]), Field('multiplier', is_mandatory=True, normalizer=lambda data, key, value: float(value), validators=[ (validators.is_number, (), dict(min_value=0)), ]), Field('bonus', default=None, validators=[ (validators.is_number, (), dict(ge=0)), ]), ).get_data() me = session['current_user'] subTaskRate = m.SubTaskRate(taskId=subTask.taskId, updatedBy=me.userId, **data) SS.add(subTaskRate) SS.flush() return jsonify({ 'message': _('created sub task rate {0} successfully').format( subTaskRate.subTaskRateId), 'subTaskRate': m.SubTaskRate.dump(subTaskRate), })
def main(taskId=None): logging.basicConfig(level=logging.DEBUG) if taskId is None: tasks = m.Task.query.filter( m.Task.status.notin_([ m.Task.STATUS_ARCHIVED, m.Task.STATUS_CLOSED, m.Task.STATUS_FINISHED ])).all() else: task = m.Task.query.get(taskId) if not task: raise ValueError('task {0} not found'.format(taskId)) tasks = [task] for task in tasks: try: end_work_intervals(task) except: out = cStringIO.StringIO() traceback.print_exc(file=out) log.error(out.getvalue()) SS.rollback() break else: log.info('task {} succeeded'.format(task.taskId)) SS.commit()
def create_error_type(): ''' creates a new error type ''' data = MyForm( Field('name', is_mandatory=True, validators=[ validators.non_blank, check_name_uniqueness, ]), Field('errorClassId', is_mandatory=True, validators=[ check_error_class_existence, ]), Field('defaultSeverity', is_mandatory=True, normalizer=lambda data, key, value: float(value), validators=[ (validators.is_number, (), dict(max_value=1, min_value=0)), ]), ).get_data() errorType = m.ErrorType(**data) SS.add(errorType) SS.flush() return jsonify({ 'message': _('created error type {0} successfully').format(errorType.name), 'errorType': m.ErrorType.dump(errorType), })
def filter_user(task, workOption, userId): try: userId = int(userId) except: raise ValueError(_('invalid user id: {}').format(userId)) # TODO: check user is working on this task? inner = SS.query( m.WorkEntry.rawPieceId.label('rawPieceId'), m.WorkEntry.userId.label('userId')).distinct( m.WorkEntry.rawPieceId).filter(m.WorkEntry.taskId == task.taskId) if workOption == MyFilter.ANY: inner = inner.filter(m.WorkEntry.userId == userId) elif workOption == MyFilter.FIRST: inner = inner.order_by(m.WorkEntry.rawPieceId, m.WorkEntry.created) elif workOption == MyFilter.MOST_RECENT: inner = inner.order_by(m.WorkEntry.rawPieceId, m.WorkEntry.created.desc()) elif workOption == MyFilter.MOST_RECENT_MODIFIED: inner = inner.filter( m.WorkEntry.modifiesTranscription.is_(True)).order_by( m.WorkEntry.rawPieceId, m.WorkEntry.created.desc()) else: raise ValueError(_('invalid work option: {}').format(workOption)) sub_q = inner.subquery('sub_q') q = SS.query(sub_q.c.rawPieceId).distinct( sub_q.c.rawPieceId).filter(sub_q.c.userId == userId) return set([r.rawPieceId for r in q.all()])
def get_qa_samples(self, subTask, userId, entryIds): population = len(entryIds) sampling_error = subTask.qaConfig.samplingError estimated_accuracy = subTask.qaConfig.defaultExpectedAccuracy confidence_interval = subTask.qaConfig.confidenceInterval samples_needed = self.get_sample_set_size(population, sampling_error, estimated_accuracy, confidence_interval) # entries which QA has been planned q_planned = SS.query(m.PageMember.workEntryId ).filter(m.PageMember.taskId==subTask.taskId ).filter(m.PageMember.workType==m.WorkType.QA ).distinct(m.PageMember.workEntryId) # entries that have been QA already q_qaed = SS.query(m.WorkEntry.qaedEntryId ).filter(m.WorkEntry.taskId==subTask.taskId ).distinct(m.WorkEntry.qaedEntryId) all_planned = set([i.workEntryId for i in q_planned.all()]) all_qaed = set([i.qaedEntryId for i in q_qaed.all()]) planned = all_planned & entryIds qaed = all_qaed & entryIds to_add = samples_needed - len(qaed) - len(planned) if to_add <= 0: return [] sample_pool = list(entryIds - planned - qaed) random.shuffle(sample_pool) return sample_pool[:to_add]
def filter_qa_error_type(task, errorTypeId): try: errorTypeId = int(erorrTypeId) except: raise ValueError(_('invalid error type id: {}').format(errorTypeId)) taskErrorType = m.TaskErrorType.query.get((task.taskId, errorTypeId)) if not taskErrorType: return set() # latest QA result inner = SS.query( m.WorkEntry.entryId, m.WorkEntry.qaedEntryId, m.WorkEntry.rawPieceId).distinct(m.WorkEntry.qaedEntryId).filter( m.WorkEntry.taskId == task.taskId).filter( m.WorkEntry.workType == m.WorkType.QA).order_by( m.WorkEntry.qaedEntryId, m.WorkEntry.created.desc()) sub_q = inner.subquery('sub_q') q = SS.query(sub_q.c.rawPieceId).distinct(sub_q.c.rawPieceId).join( m.AppliedError, m.AppliedError.entryId == sub_q.c.entryId).filter( m.AppliedError.errorTypeId == errorTypeId) return set([r.rawPieceId for r in q.all()])
def assign_batch_to_user(batchId, userId): batch = m.Batch.query.get(batchId) if not batch: raise InvalidUsage(_('batch {0} not found').format(batchId), 404) user = m.User.query.get(userId) if not user: raise InvalidUsage(_('user {0} not found').format(userId), 404) # TODO: perform more checks according to policy if policy.active_worker_only: if m.TaskWorker.query.filter_by(taskId=batch.taskId).filter_by( subTaskId=batch.subTaskId).filter_by(userId=userId).filter_by( removed=False).count() == 0: raise InvalidUsage( _('user {0} is not working on sub task {1}').format( userId, batch.subTaskId)) # TODO: change time from naive to timezone aware batch.leaseGranted = datetime.utcnow().replace(tzinfo=pytz.utc) # batch.leaseGranted = datetime.now() batch.leaseExpires = batch.leaseGranted + batch.subTask.defaultLeaseLife batch.user = user SS.flush() return jsonify({ 'message': _('batch {0} has been assigned to user {1}, expires at {2}').format( batchId, user.userName, batch.leaseExpires), 'batch': m.Batch.dump(batch), })
def api(fn): @wraps(fn) def decorated(*args, **kwargs): ''' api handlers generally return responses with mimetype set to json, this can be changed by returning a response instead (e.g. froma file download handler). ''' try: result = fn(*args, **kwargs) if isinstance(result, dict): resp = jsonify(result) elif isinstance(result, Response): resp = result else: raise RuntimeError, 'unexpected datatype returned from api handler' except InvalidUsage, e: resp = make_response(jsonify(e.to_dict()), e.status_code, {}) SS.rollback() except HTTPException, e: # # Normally we should not end up being here because all api # handlers are suppose to raise InvalidUsage and such Exceptions # should be caught by api version blueprint's error handler. # In case there are non-compliant handlers that are still using # using HTTPException directly, we explicitly convert it to a # JSON response. # resp = make_response(jsonify({'error': '%s' % e}), e.code, {}) SS.rollback()
def create_alphabet(): data = MyForm( Field('name', is_mandatory=True, validators=[ validators.non_blank, check_name_uniqueness, ]), Field('description'), Field('dialectId', is_mandatory=True, validators=[ validators.is_number, check_dialect_existence, ]), Field( 'url', default=lambda: None, ), ).get_data() alphabet = m.Alphabet(**data) SS.add(alphabet) SS.flush() return jsonify({ 'message': _('created alphabet {0} successfully').format(alphabet.name), 'alphabet': m.Alphabet.dump(alphabet), })
def end_work_intervals(task=None): q = m.WorkInterval.query.filter( m.WorkInterval.status == m.WorkInterval.STATUS_CURRENT) if task is not None: q = q.filter(m.WorkInterval.taskId == task.taskId) now = datetime.datetime.utcnow().replace(tzinfo=pytz.utc) endTime = datetime.datetime(now.year, now.month, now.day, 23, 59, 59, 999999, tzinfo=pytz.utc) for wi in q.all(): wi.endTime = endTime wi.status = wi.STATUS_ADDING_FINAL_CHECKS newWorkInterval = m.WorkInterval( taskId=wi.taskId, subTaskId=wi.subTaskId, status=wi.STATUS_CURRENT, startTime=endTime, endTime=None, ) SS.add(newWorkInterval)
def create_label_group(labelSetId): ''' creates a new label group ''' labelSet = m.LabelSet.query.get(labelSetId) if not labelSet: raise InvalidUsage( _('label set {0} not found').format(labelSetId), 404) data = MyForm( Field('name', is_mandatory=True, validators=[ validators.is_string, (check_label_group_name_uniqueness, (labelSetId, None)), ]), Field('dropDownDisplay', default=False, validators=[ validators.is_bool, ]), Field('isMandatory', default=False, validators=[ validators.is_bool, ]), ).get_data() labelGroup = m.LabelGroup(**data) SS.add(labelGroup) SS.flush() return jsonify({ 'message': _('created label group {0} successfully').format(labelGroup.name), 'labelGroup': m.LabelGroup.dump(labelGroup), })
def disable_tag_for_sub_task(subTaskId, tagId): rec = m.ShadowedTag.query.get((subTaskId, tagId)) if not rec: rec = m.ShadowedTag(subTaskId=subTaskId, tagId=tagId) SS.add(rec) return jsonify(message=_( 'tag {0} has been disabled for sub task {1}').format(tagId, subTaskId))
def disable_label_for_sub_task(subTaskId, labelId): rec = m.ShadowedLabel.query.get((subTaskId, labelId)) if not rec: rec = m.ShadowedLabel(subTaskId=subTaskId, labelId=labelId) SS.add(rec) return jsonify( message=_('label {0} has been disabled for sub task {1}').format( labelId, subTaskId))
def main(taskId=None): logging.basicConfig(level=logging.DEBUG) log.debug('collapsing payable events, taskId={}'.format(taskId)) try: collapse_payable_events() except Exception, e: out = cStringIO.StringIO() traceback.print_exc(file=out) log.error(out.getvalue()) SS.rollback()
def save_record(self): if m.SnsMessageRecord.query.get(self.MessageId): raise RuntimeError( _('message {} has been saved already').format(self.MessageId)) record = m.SnsMessageRecord(messageId=self.MessageId, messageType=self.Type, body=self.body) SS.add(record) SS.flush() current_app.logger.debug('record of message {} has been saved'.format( self.MessageId))
def update_label(labelSetId, labelId): ''' updates label settings ''' labelSet = m.LabelSet.query.get(labelSetId) if not labelSet: raise InvalidUsage( _('label set {0} not found').format(labelSetId), 404) label = m.Label.query.get(labelId) if not label or label.labelSetId != labelSetId: raise InvalidUsage(_('label {0} not found').format(lableId), 404) data = MyForm( Field('name', validators=[ (check_label_name_uniqueness, (labelSetId, labelId)), ]), Field('description'), Field('shortcutKey', validators=[ (validators.is_string, (), dict(length=1)), check_label_shortcut_key_non_space, (check_label_shortcut_key_uniqueness, (labelSetId, labelId)), ]), Field('extract', validators=[ validators.non_blank, (check_label_extract_uniqueness, (labelSetId, labelId)), ]), Field('labelGroupId', validators=[ (check_label_group_existence, (labelSetId, )), ]), Field('enabled', validators=[ validators.is_bool, ]), ).get_data() # data['labelSetId'] = labelSetId for key in data.keys(): value = data[key] if getattr(label, key) != value: setattr(label, key, value) else: del data[key] SS.flush() return jsonify({ 'message': _('updated label {0} successfully').format(labelId), 'updatedFields': data.keys(), 'label': m.Label.dump(label), })
def filter_unused(): return SS.query(m.TaskWorker.userId.distinct() ).filter(m.TaskWorker.removed==False ).filter(m.TaskWorker.taskId.in_( SS.query(m.Task.taskId ).filter_by(taskType=m.TaskType.TRANSLATION ).filter(m.Task.status.in_([ m.Task.STATUS_ACTIVE, m.Task.STATUS_DISABLED]) ) ) ).all()
def action_assign_task_supervisor(userIds, taskId): for userId in userIds: s = m.TaskSupervisor.query.get((taskId, userId)) if not s: s = m.TaskSupervisor(taskId=taskId, userId=userId) SS.add(s) total = len(userIds) message = _('Assigned the user as a supervisor of task {0}' if total == 1 else 'Assigned {1} users as supervisors of task {0}' ).format(taskId, len(userIds)) url = url_for('views.task_config', taskId=taskId, _anchor='supervisors', _external=True) return {'message': message, 'link': url}
def filter_qa_severity(task, isMoreThan, score, isCorrect): try: assert isMoreThan in (MyFilter.TRUE, MyFilter.FALSE) except: raise ValueError(_('invalid option value: {}').format(isMoreThan)) else: isMoreThan = isMoreThan == MyFilter.TRUE try: assert isCorrect in (MyFilter.TRUE, MyFilter.FALSE) except: raise ValueError(_('invalid option value: {}').format(isCorrect)) else: isCorrect = isCorrect == MyFilter.TRUE try: score = float(score) except: raise ValueError(_('invalid score value: {}').format(score)) if isMoreThan: if isCorrect: predicate = lambda qaErrorSum: qaErrorSum == None or ( 1 - qaErrorSum) > score else: predicate = lambda qaErrorSum: qaErrorSum > score else: if correct: predicate = lambda qaErrorSum: 1 - (qaErrorSum or 0) < score else: predicate = lambda qaErrorSum: qaErrorSum == None or qaErrorSum < score # latest QA result q1 = SS.query(m.WorkEntry.entryId, m.WorkEntry.qaedEntryId, m.WorkEntry.rawPieceId).distinct( m.WorkEntry.qaedEntryId).filter( m.WorkEntry.taskId == task.taskId).filter( m.WorkEntry.workType == m.WorkType.QA).order_by( m.WorkEntry.qaedEntryId, m.WorkEntry.created.desc()) sub_q = q1.subquery('sub_q') stmt = SS.query(m.AppliedError.entryId, func.sum( m.AppliedError.severity).label('qaErrorSum')).group_by( m.AppliedError.entryId).subquery() q = SS.query(sub_q.c.rawPieceId, stmt.c.qaErrorSum).join(stmt, stmt.c.entryId == sub_q.c.entryId) return set([r.rawPieceId for r in q.all() if predicate(r.qaErrorSum)])
def save_headwords(taskId): task = m.Task.query.get(taskId) if not task: raise InvalidUsage(_('task {0} not found').format(taskId), 404) if task.taskType != 'Spelling': raise InvalidUsage( _('task {0} has unexpected task type').format(taskId)) data = MyForm(Field( 'headwords', is_mandatory=True, )).get_data() load = m.Load(taskId=taskId, createdBy=699) SS.add(load) SS.flush() rawPieces = [] for i, r in enumerate(data['headwords']): assemblyContext = 'L_%05d_%05d' % (load.loadId, i) allocationContext = 'L_%05d' % load.loadId try: del r['meta'] except KeyError: pass print r rawPiece = m.RawPiece(taskId=taskId, loadId=load.loadId, assemblyContext=assemblyContext, allocationContext=allocationContext, words=1, **r) rawPieces.append(rawPiece) SS.add(rawPiece) SS.flush() return jsonify(rawPieces=m.RawPiece.dump(rawPieces))
def create_new_alphabet_rule(alphabetId): alphabet = m.Alphabet.query.get(alphabetId) if not alphabet: raise InvalidUsage(_('alphabet {0} not found').format(alphabetId), 404) data = MyForm( Field('name'), Field('type'), Field('description'), ).get_data() rule = m.Rule(**data) rule.alphabetId = alphabetId SS.add(rule) SS.flush() return jsonify(rule=m.Rule.dump(rule))
def delete_sub_task_qa_settings(subTaskId): subTask = m.SubTask.query.get(subTaskId) if not subTask: raise InvalidUsage(_('sub task {0} not found').format(subTaskId)) if subTask.qaConfig: SS.delete(subTask.qaConfig) message = _('deleted default QA settings of sub task {0}').format( subTaskId) else: message = _('sub task {0} does not have default QA settings').format( subTaskId) return jsonify({ 'message': message, })
def filter_allocation_context(task, text): cond = m.RawPiece.allocationContext == text q = SS.query(m.RawPiece.rawPieceId).filter( m.RawPiece.taskId == task.taskId).filter(cond) return set([r.rawPieceId for r in q.all()])
def select(selection): # TODO: implemet this taskId = getattr(selection, 'taskId') task = m.Task.query.get(taskId) if taskId is None: raise ValueError(_('must specify taskId')) filters = { True: {}, # inclusive False: {}, # exclusive } for f in selection.filters: filters[f.isInclusive].setdefault(f.filterType, []).append(f) rs = set([ r.rawPieceId for r in SS.query(m.RawPiece.rawPieceId).filter( m.RawPiece.taskId == taskId) ]) for filter_type, fs in filters[True].iteritems(): result = reduce(operator.or_, [MyFilter.run(f, task) for f in fs]) rs &= result for filter_type, fs in filters[False].iteritems(): result = reduce(operator.or_, [MyFilter.run(f, task) for f in fs]) rs -= result rs = sorted(rs) if selection.limit != None: limit = min(selection.limit, len(rs)) rs = random.sample(rs, limit) return rs
def filter_sub_task_work(task, workOption, subTaskId): try: subTaskId = int(subTaskId) except: raise ValueError(_('invalid sub task id: {}').format(subTaskId)) subTask = m.SubTask.query.get(subTaskId) if not subTask or subTask.taskId != task.taskId: return set() inner = SS.query(m.WorkEntry.rawPieceId, m.WorkEntry.subTaskId).distinct( m.WorkEntry.rawPieceId).filter(m.WorkEntry.taskId == task.taskId) if workOption == MyFilter.ANY: inner = inner.filter(m.WorkEntry.subTaskId == subTaskId) elif workOption == MyFilter.FIRST: inner = inner.order_by(m.WorkEntry.rawPieceId, m.WorkEntry.created) elif workOption == MyFilter.MOST_RECENT: inner = inner.order_by(m.WorkEntry.rawPieceId, m.WorkEntry.created.desc()) elif workOption == MyFilter.MOST_RECENT_MODIFIED: inner = inner.filter( m.WorkEntry.modifiesTranscription.is_(True)).order_by( m.WorkEntry.rawPieceId, m.WorkEntry.created.desc()) sub_q = inner.subquery('sub_q') sel_stmt = select([sub_q.c.rawPieceId], distinct=True, from_obj=sub_q).where(sub_q.c.subTaskId == subTaskId) return set([r.rawPieceId for r in SS.bind.execute(sel_stmt)])
def unassign_batch(batchId): batch = m.Batch.query.get(batchId) if not batch: raise InvalidUsage(_('batch {0} not found').format(batchId), 404) # TODO: check policy # TODO: update history? if batch.userId != None: batch.unassign() message = _('batch {0} has been un-assigned').format(batchId) else: message = _('batch {0} is not assigned to anyone').format(batchId) SS.flush() return jsonify({ 'message': message, 'batch': m.Batch.dump(batch), })
def filter_label(task, labelId): if labelId == MyFilter.ANY: labelId = None else: try: labelId = int(labelId) except: raise ValueError(_('invalid label id: {}').format(labelId)) inner = SS.query(m.WorkEntry.rawPieceId.label('rawPieceId'), m.WorkEntry.entryId.label('entryId')).distinct( m.WorkEntry.rawPieceId).filter( m.WorkEntry.taskId == task.taskId).filter( m.WorkEntry.modifiesTranscription).order_by( m.WorkEntry.rawPieceId, m.WorkEntry.created.desc()) sub_q = inner.subquery('sub_q') q = sub_q.join(m.AppliedLabel) sel_stmt = select([sub_q.c.rawPieceId], distinct=True, from_obj=q) if labelId != None: sel_stmt = sel_stmt.where(m.AppliedLabel.labelId == labelId) return set([r.rawPieceId for r in SS.bind.execute(sel_stmt)])
def filter_word_count(task, wordCountOption, words): try: words = int(words) except: raise ValueError(_('invalid words: {}').format(words)) def count_words(t): extractText = Converter.asExtract(t) return len(extractText.split()) if wordCountOption == MyFilter.EQUALS: func_ok = lambda (t): count_words(t) == words elif wordCountOption == MyFilter.GREATER_THAN: func_ok = lambda (t): count_words(t) > words elif wordCountOption == MyFilter.LESS_THAN: func_ok = lambda (t): count_words(t) < words q = SS.query(m.WorkEntry.rawPieceId, m.WorkEntry.result).distinct(m.WorkEntry.rawPieceId).filter( m.WorkEntry.taskId == task.taskId).filter( m.WorkEntry.modifiesTranscription).order_by( m.WorkEntry.rawPieceId, m.WorkEntry.created.desc()) return set([r.rawPieceId for r in q.all() if func_ok(r.result)])
def check_get_policy(subTask, user): if subTask.getPolicy == m.SubTask.POLICY_NO_LIMIT: return None elif subTask.getPolicy == m.SubTask.POLICY_ONE_ONLY: # check if user has submitted any batch q = SS.query(m.WorkEntry.batchId.distinct()).filter( m.WorkEntry.subTaskId == subTask.subTaskId).filter( m.WorkEntry.userId == user.userId).filter( m.WorkEntry.batchId.notin_( SS.query(m.Batch.batchId).filter( m.Batch.subTaskId == subTask.subTaskId))) if q.count() > 0: return _('user has done work on this sub task before').format() # return _('unknown policy \'{0}\' of sub task {1}' # ).format(subTask.getPolicy, subTask.subTaskId) return None
def decorated(*args, **kwargs): ''' api handlers generally return responses with mimetype set to json, this can be changed by returning a response instead (e.g. froma file download handler). ''' try: result = fn(*args, **kwargs) if isinstance(result, dict): resp = jsonify(result) elif isinstance(result, Response): resp = result else: raise RuntimeError, 'unexpected datatype returned from api handler' except InvalidUsage, e: resp = make_response(jsonify(e.to_dict()), e.status_code, {}) SS.rollback()
# JSON response. # resp = make_response(jsonify({'error': '%s' % e}), e.code, {}) SS.rollback() except Exception, e: # # Oops! Caught unhandled exception, log what happend # and return an error response to client # out = cStringIO.StringIO() traceback.print_exc(file=out) current_app.logger.error('\033[1;31mERROR caught inside api:\033[0m\n%s\n' % out.getvalue()) # TODO: hide debug information for production deployment resp = make_response((jsonify({'error': '%s' % e}), 500, {})) SS.rollback() else: SS.commit() return resp return decorated def caps(*caps): def customized_decorator(fn): @wraps(fn) def decorated(*args, **kwargs): user = session['current_user'] missing = set(caps) - set(getattr(user, 'caps', set())) if missing: raise InvalidUsage( _('not enough capabilities to perform requested operation'),