Example #1
0
def _validate_language_codes(codes):
    """
    Checks that all language codes are valid.

    Parameters:
    - codes:tuple specifies language codes.

    Raises:
    - CommandError in case of invalid language code.
    """
    for code in codes:
        if not validate_language_code(code):
            raise CommandError(
                '{0!r} contains invalid language code!'.format(code))
Example #2
0
#
# TASKS_TO_ANNOTATORS = {
#     ('deu', 'ces') : _create_uniform_task_map(12, 24, REDUNDANCY),
# }
TASKS_TO_ANNOTATORS = {}

CAMPAIGN_URL = 'http://wmt19.appraise.cf/dashboard/sso/'
CAMPAIGN_NAME = 'WMT19DocSrcDACrowd2'
CAMPAIGN_KEY = 'WMT19DocSrcDACrowd2'
CAMPAIGN_NO = 230
ANNOTATORS = None  # Will be determined by TASKS_TO_ANNOTATORS mapping
TASKS = None
REDUNDANCY = 1

for code in EX_LANGUAGES + XE_LANGUAGES + XY_LANGUAGES:
    if not validate_language_code(code):
        _msg = '{0!r} contains invalid language code!'.format(code)
        raise ValueError(_msg)

for ex_code in EX_LANGUAGES:
    TASKS_TO_ANNOTATORS[('eng', ex_code)] = _create_uniform_task_map(
        0, 0, REDUNDANCY)

for xe_code in XE_LANGUAGES:
    TASKS_TO_ANNOTATORS[(xe_code,
                         'eng')] = _create_uniform_task_map(0, 0, REDUNDANCY)

for xy_code in XY_LANGUAGES:
    TASKS_TO_ANNOTATORS[xy_code] = _create_uniform_task_map(0, 0, REDUNDANCY)

TASKS_TO_ANNOTATORS = {
Example #3
0
def _validate_tasks_to_annotators_map(tasks_to_annotators, redundancy):
    '''Validates TASKS_TO_ANNOTATORS data.

    Description:
        This should be an array of arrays, like this:
            "TASKS_TO_ANNOTATORS": [
                ["eng", "trk", "uniform", 18, 36],
                ["trk", "eng", "uniform", 18, 36]
            ]

        Each inner array should have five values:
            1. str: source language code
            2. str: target language code
            3. str: task map setup mode
            4. int: number of annotators
            5. int: number of tasks

        Currently, the only supported task map setup mode is "uniform";
        this requires the following invariant:

            annotators * 2 * redundancy == tasks

    Parameters:
    - tasks_to_annototators:dict contains TASKS_TO_ANNOTATORS dict;
    - redundancy:int specifies campaign redundancy.

    Raises:
    - ValidationError in case of missing manifest data.

    Returns:
    - True if validation is successful.
    '''
    if not isinstance(tasks_to_annotators, list):
        raise ValidationError(
            "manifest.json key 'TASKS_TO_ANNOTATORS' should have "
            'list type, is {0!r}'.format(tasks_to_annotators))

    # Validate items in TASKS_TO_ANNOTATORS
    for item in tasks_to_annotators:
        if not isinstance(item, list):
            raise ValidationError(
                "manifest.json key 'TASKS_TO_ANNOTATORS' list "
                'item should have list type, is {0!r}'.format(item))

        if not len(item) == 5:
            raise ValidationError(
                "manifest.json key 'TASKS_TO_ANNOTATORS' list "
                'item should be 5-tuple, is {0!r}'.format(item))

        source_code, target_code, mode, annotators, tasks = item

        # Vaidate correct item type signature: <str, str, str, int, int>
        correct_types = [
            isinstance(x, str) for x in (source_code, target_code, mode)
        ]
        correct_types.extend([isinstance(x, int) for x in (annotators, tasks)])
        if not all(correct_types):
            raise ValidationError(
                "manifest.json key 'TASKS_TO_ANNOTATORS' list "
                'item should have <str, str, str, int, int> '
                'signature, is {0!r}'.format(item))

        # Validate that source_code/target_code are valid language codes
        valid_language_codes = [
            validate_language_code(x) for x in (source_code, target_code)
        ]
        if not all(valid_language_codes):
            raise ValidationError(
                "manifest.json key 'TASKS_TO_ANNOTATORS' list item "
                'has invalid language codes, check {0!r}'.format(item))

        # Validate mode is set to "uniform" -- which is the only task
        # map creation mode currently supported. For "uniform" mode, we
        # also require that: annotators * 2 * redundancy == tasks.
        if not mode.lower() == 'uniform':
            raise ValidationError(
                "manifest.json key 'TASKS_TO_ANNOTATORS' list item only"
                'supports "uniform" mode, check {0!r}'.format(item))

        expected = annotators * 2 * redundancy
        if not expected == tasks:
            raise ValidationError(
                "manifest.json key 'TASKS_TO_ANNOTATORS' list item has "
                'bad task map ({0} * 2 * {1} != {2}), check {3!r}'.format(
                    annotators, redundancy, tasks, item))

    return True