def _validate_language_codes(codes): """ Checks that all language codes are valid. Parameters: - codes:tuple specifies language codes. Raises: - CommandError in case of invalid language code. """ for code in codes: if not validate_language_code(code): raise CommandError( '{0!r} contains invalid language code!'.format(code))
# # TASKS_TO_ANNOTATORS = { # ('deu', 'ces') : _create_uniform_task_map(12, 24, REDUNDANCY), # } TASKS_TO_ANNOTATORS = {} CAMPAIGN_URL = 'http://wmt19.appraise.cf/dashboard/sso/' CAMPAIGN_NAME = 'WMT19DocSrcDACrowd2' CAMPAIGN_KEY = 'WMT19DocSrcDACrowd2' CAMPAIGN_NO = 230 ANNOTATORS = None # Will be determined by TASKS_TO_ANNOTATORS mapping TASKS = None REDUNDANCY = 1 for code in EX_LANGUAGES + XE_LANGUAGES + XY_LANGUAGES: if not validate_language_code(code): _msg = '{0!r} contains invalid language code!'.format(code) raise ValueError(_msg) for ex_code in EX_LANGUAGES: TASKS_TO_ANNOTATORS[('eng', ex_code)] = _create_uniform_task_map( 0, 0, REDUNDANCY) for xe_code in XE_LANGUAGES: TASKS_TO_ANNOTATORS[(xe_code, 'eng')] = _create_uniform_task_map(0, 0, REDUNDANCY) for xy_code in XY_LANGUAGES: TASKS_TO_ANNOTATORS[xy_code] = _create_uniform_task_map(0, 0, REDUNDANCY) TASKS_TO_ANNOTATORS = {
def _validate_tasks_to_annotators_map(tasks_to_annotators, redundancy): '''Validates TASKS_TO_ANNOTATORS data. Description: This should be an array of arrays, like this: "TASKS_TO_ANNOTATORS": [ ["eng", "trk", "uniform", 18, 36], ["trk", "eng", "uniform", 18, 36] ] Each inner array should have five values: 1. str: source language code 2. str: target language code 3. str: task map setup mode 4. int: number of annotators 5. int: number of tasks Currently, the only supported task map setup mode is "uniform"; this requires the following invariant: annotators * 2 * redundancy == tasks Parameters: - tasks_to_annototators:dict contains TASKS_TO_ANNOTATORS dict; - redundancy:int specifies campaign redundancy. Raises: - ValidationError in case of missing manifest data. Returns: - True if validation is successful. ''' if not isinstance(tasks_to_annotators, list): raise ValidationError( "manifest.json key 'TASKS_TO_ANNOTATORS' should have " 'list type, is {0!r}'.format(tasks_to_annotators)) # Validate items in TASKS_TO_ANNOTATORS for item in tasks_to_annotators: if not isinstance(item, list): raise ValidationError( "manifest.json key 'TASKS_TO_ANNOTATORS' list " 'item should have list type, is {0!r}'.format(item)) if not len(item) == 5: raise ValidationError( "manifest.json key 'TASKS_TO_ANNOTATORS' list " 'item should be 5-tuple, is {0!r}'.format(item)) source_code, target_code, mode, annotators, tasks = item # Vaidate correct item type signature: <str, str, str, int, int> correct_types = [ isinstance(x, str) for x in (source_code, target_code, mode) ] correct_types.extend([isinstance(x, int) for x in (annotators, tasks)]) if not all(correct_types): raise ValidationError( "manifest.json key 'TASKS_TO_ANNOTATORS' list " 'item should have <str, str, str, int, int> ' 'signature, is {0!r}'.format(item)) # Validate that source_code/target_code are valid language codes valid_language_codes = [ validate_language_code(x) for x in (source_code, target_code) ] if not all(valid_language_codes): raise ValidationError( "manifest.json key 'TASKS_TO_ANNOTATORS' list item " 'has invalid language codes, check {0!r}'.format(item)) # Validate mode is set to "uniform" -- which is the only task # map creation mode currently supported. For "uniform" mode, we # also require that: annotators * 2 * redundancy == tasks. if not mode.lower() == 'uniform': raise ValidationError( "manifest.json key 'TASKS_TO_ANNOTATORS' list item only" 'supports "uniform" mode, check {0!r}'.format(item)) expected = annotators * 2 * redundancy if not expected == tasks: raise ValidationError( "manifest.json key 'TASKS_TO_ANNOTATORS' list item has " 'bad task map ({0} * 2 * {1} != {2}), check {3!r}'.format( annotators, redundancy, tasks, item)) return True