Beispiel #1
0
def _compute_language_pair_stats():
    """
    Computes HIT statistics per language pair.
    """
    language_pair_stats = []
    
    # TODO: move LANGUAGE_PAIR_CHOICES better place.
    #
    # Running compute_remaining_hits() will also update completion status for HITs.
    for choice in LANGUAGE_PAIR_CHOICES:
        _code = choice[0]
        _name = choice[1]
        _remaining_hits = HIT.compute_remaining_hits(language_pair=_code)
        _completed_hits = HIT.objects.filter(completed=True, mturk_only=False,
          language_pair=_code)
        
        _unique_systems_for_language_pair = set()
        for _hit in _completed_hits:
            for _result in RankingResult.objects.filter(item__hit=_hit):
                for _translation in _result.item.translations:
                    for _system in set(_translation[1]['system'].split(',')):
                         _unique_systems_for_language_pair.add(_system)
        
        LOGGER.info(_unique_systems_for_language_pair)
        _completed_hits = _completed_hits.count()
        _total_hits = _remaining_hits + _completed_hits
                
        _data = (
          _name,
          len(_unique_systems_for_language_pair),
          (_remaining_hits, 100 * _remaining_hits/float(_total_hits or 1)),
          (_completed_hits, 100 * _completed_hits/float(_total_hits or 1))
        )
        
        language_pair_stats.append(_data)
    
    return language_pair_stats
Beispiel #2
0
def _compute_language_pair_stats():
    """
    Computes HIT statistics per language pair.
    """
    language_pair_stats = []

    # TODO: move LANGUAGE_PAIR_CHOICES better place.
    #
    # Running compute_remaining_hits() will also update completion status for HITs.
    for choice in LANGUAGE_PAIR_CHOICES:
        _code = choice[0]
        _name = choice[1]
        _remaining_hits = HIT.compute_remaining_hits(language_pair=_code)
        _completed_hits = HIT.objects.filter(completed=True,
                                             mturk_only=False,
                                             language_pair=_code)

        _unique_systems_for_language_pair = set()
        for _hit in _completed_hits:
            for _result in RankingResult.objects.filter(item__hit=_hit):
                for _translation in _result.item.translations:
                    for _system in set(_translation[1]['system'].split(',')):
                        _unique_systems_for_language_pair.add(_system)

        LOGGER.info(_unique_systems_for_language_pair)
        _completed_hits = _completed_hits.count()
        _total_hits = _remaining_hits + _completed_hits

        _data = (_name, len(_unique_systems_for_language_pair),
                 (_remaining_hits,
                  100 * _remaining_hits / float(_total_hits or 1)),
                 (_completed_hits,
                  100 * _completed_hits / float(_total_hits or 1)))

        language_pair_stats.append(_data)

    return language_pair_stats
Beispiel #3
0
 Author: Christian Federmann <*****@*****.**>

usage: export_wmt16_status.py

Exports HIT status for all language pairs.

"""
from datetime import datetime
import os
import sys

if __name__ == "__main__":
    # Properly set DJANGO_SETTINGS_MODULE environment variable.
    os.environ['DJANGO_SETTINGS_MODULE'] = 'settings'
    PROJECT_HOME = os.path.normpath(os.getcwd() + "/..")
    sys.path.append(PROJECT_HOME)

    # We have just added appraise to the system path list, hence this works.
    from appraise.wmt16.models import HIT, LANGUAGE_PAIR_CHOICES

    remaining_hits = {}
    for language_pair in [x[0] for x in LANGUAGE_PAIR_CHOICES]:
        remaining_hits[language_pair] = HIT.compute_remaining_hits(
            language_pair=language_pair)

    print()
    print(('[{0}]'.format(datetime.now().strftime("%c"))))
    for k, v in list(remaining_hits.items()):
        print(('{0}: {1:03d}'.format(k, v)))
    print()
Beispiel #4
0
def _compute_global_stats():
    """
    Computes some global statistics for the WMT16 evaluation campaign.
    """
    global_stats = []
    
    wmt16_group = Group.objects.filter(name='WMT16')
    wmt16_users = _get_active_users_for_group(wmt16_group)
      
    # Check how many HITs have been completed.  We now consider a HIT to be
    # completed once it has been annotated by one or more annotators.
    #
    # Before we required `hit.users.count() >= 3` for greater overlap.
    hits_completed = HIT.objects.filter(mturk_only=False, completed=True).count()
    
    # Check any remaining active HITs which are not yet marked complete.
    for hit in HIT.objects.filter(active=True, mturk_only=False, completed=False):
        if hit.users.count() >= 1:
            hits_completed = hits_completed + 1
            hit.completed = True
            hit.save()
    
    # Compute remaining HITs for all language pairs.
    hits_remaining = HIT.compute_remaining_hits()
    
    # Compute number of results contributed so far.
    ranking_results = RankingResult.objects.filter(
      item__hit__completed=True, item__hit__mturk_only=False)
    
    from math import factorial
    system_comparisons = 0
    for result in ranking_results:
        result.reload_dynamic_fields()
        # TODO: this implicitly counts A=B comparisons for multi systems.
        # Basically, inflating the number of pairwise comparisons... Fix!
        combinations = factorial(result.systems)/(factorial(result.systems-2) * 2) if result.systems > 2 else 0
        system_comparisons = system_comparisons + combinations
    
    # Aggregate information about participating groups.
    groups = set()
    for user in wmt16_users:
        for group in _identify_groups_for_user(user):
            groups.add(group)
    
    # Compute average/total duration over all results.
    durations = RankingResult.objects.all().values_list('duration', flat=True)
    total_time = sum([datetime_to_seconds(x) for x in durations])
    avg_time = total_time / float(hits_completed or 1)
    avg_user_time = total_time / float(3 * hits_completed or 1)
    
    global_stats.append(('Users', len(wmt16_users)))
    global_stats.append(('Groups', len(groups)))
    global_stats.append(('HITs completed', '{0:,}'.format(hits_completed)))
    global_stats.append(('HITs remaining', '{0:,}'.format(hits_remaining)))
    global_stats.append(('Ranking results', '{0:,}'.format(ranking_results.count())))
    global_stats.append(('System comparisons', '{0:,}'.format(system_comparisons)))
    global_stats.append(('Average duration (per HIT)', seconds_to_timedelta(avg_time)))
    global_stats.append(('Average duration (per task)', seconds_to_timedelta(avg_user_time)))
    global_stats.append(('Total duration', seconds_to_timedelta(total_time)))
    
    # Create new status data snapshot
    TimedKeyValueData.update_status_if_changed('users', str(len(wmt16_users)))
    TimedKeyValueData.update_status_if_changed('groups', str(len(groups)))
    TimedKeyValueData.update_status_if_changed('hits_completed', str(hits_completed))
    TimedKeyValueData.update_status_if_changed('hits_remaining', str(hits_remaining))
    TimedKeyValueData.update_status_if_changed('ranking_results', str(ranking_results.count()))
    TimedKeyValueData.update_status_if_changed('system_comparisons', str(system_comparisons))
    TimedKeyValueData.update_status_if_changed('duration_per_hit', str(seconds_to_timedelta(avg_time)))
    TimedKeyValueData.update_status_if_changed('duration_per_task', str(seconds_to_timedelta(avg_user_time)))
    TimedKeyValueData.update_status_if_changed('duration_total', str(seconds_to_timedelta(total_time)))
    
    return global_stats
usage: export_wmt16_status.py

Exports HIT status for all language pairs.

"""
from datetime import datetime
import os
import sys


if __name__ == "__main__":
    # Properly set DJANGO_SETTINGS_MODULE environment variable.
    os.environ['DJANGO_SETTINGS_MODULE'] = 'settings'
    PROJECT_HOME = os.path.normpath(os.getcwd() + "/..")
    sys.path.append(PROJECT_HOME)
    
    # We have just added appraise to the system path list, hence this works.
    from appraise.wmt16.models import HIT, LANGUAGE_PAIR_CHOICES
    
    remaining_hits = {}
    for language_pair in [x[0] for x in LANGUAGE_PAIR_CHOICES]:
        remaining_hits[language_pair] = HIT.compute_remaining_hits(
          language_pair=language_pair)
    
    print
    print '[{0}]'.format(datetime.now().strftime("%c"))
    for k, v in remaining_hits.items():
        print '{0}: {1:03d}'.format(k, v)
    print
Beispiel #6
0
def _compute_global_stats():
    """
    Computes some global statistics for the WMT16 evaluation campaign.
    """
    global_stats = []

    wmt16_group = Group.objects.filter(name='WMT16')
    wmt16_users = _get_active_users_for_group(wmt16_group)

    # Check how many HITs have been completed.  We now consider a HIT to be
    # completed once it has been annotated by one or more annotators.
    #
    # Before we required `hit.users.count() >= 3` for greater overlap.
    hits_completed = HIT.objects.filter(mturk_only=False,
                                        completed=True).count()

    # Check any remaining active HITs which are not yet marked complete.
    for hit in HIT.objects.filter(active=True,
                                  mturk_only=False,
                                  completed=False):
        if hit.users.count() >= 1:
            hits_completed = hits_completed + 1
            hit.completed = True
            hit.save()

    # Compute remaining HITs for all language pairs.
    hits_remaining = HIT.compute_remaining_hits()

    # Compute number of results contributed so far.
    ranking_results = RankingResult.objects.filter(item__hit__completed=True,
                                                   item__hit__mturk_only=False)

    from math import factorial
    system_comparisons = 0
    for result in ranking_results:
        result.reload_dynamic_fields()
        # TODO: this implicitly counts A=B comparisons for multi systems.
        # Basically, inflating the number of pairwise comparisons... Fix!
        combinations = factorial(result.systems) / (
            factorial(result.systems - 2) * 2) if result.systems > 2 else 0
        system_comparisons = system_comparisons + combinations

    # Aggregate information about participating groups.
    groups = set()
    for user in wmt16_users:
        for group in _identify_groups_for_user(user):
            groups.add(group)

    # Compute average/total duration over all results.
    durations = RankingResult.objects.all().values_list('duration', flat=True)
    total_time = sum([datetime_to_seconds(x) for x in durations])
    avg_time = total_time / float(hits_completed or 1)
    avg_user_time = total_time / float(3 * hits_completed or 1)

    global_stats.append(('Users', len(wmt16_users)))
    global_stats.append(('Groups', len(groups)))
    global_stats.append(('HITs completed', '{0:,}'.format(hits_completed)))
    global_stats.append(('HITs remaining', '{0:,}'.format(hits_remaining)))
    global_stats.append(
        ('Ranking results', '{0:,}'.format(ranking_results.count())))
    global_stats.append(
        ('System comparisons', '{0:,}'.format(system_comparisons)))
    global_stats.append(
        ('Average duration (per HIT)', seconds_to_timedelta(avg_time)))
    global_stats.append(
        ('Average duration (per task)', seconds_to_timedelta(avg_user_time)))
    global_stats.append(('Total duration', seconds_to_timedelta(total_time)))

    # Create new status data snapshot
    TimedKeyValueData.update_status_if_changed('users', str(len(wmt16_users)))
    TimedKeyValueData.update_status_if_changed('groups', str(len(groups)))
    TimedKeyValueData.update_status_if_changed('hits_completed',
                                               str(hits_completed))
    TimedKeyValueData.update_status_if_changed('hits_remaining',
                                               str(hits_remaining))
    TimedKeyValueData.update_status_if_changed('ranking_results',
                                               str(ranking_results.count()))
    TimedKeyValueData.update_status_if_changed('system_comparisons',
                                               str(system_comparisons))
    TimedKeyValueData.update_status_if_changed(
        'duration_per_hit', str(seconds_to_timedelta(avg_time)))
    TimedKeyValueData.update_status_if_changed(
        'duration_per_task', str(seconds_to_timedelta(avg_user_time)))
    TimedKeyValueData.update_status_if_changed(
        'duration_total', str(seconds_to_timedelta(total_time)))

    return global_stats
Beispiel #7
0
def _compute_global_stats():
    """
    Computes some global statistics for the wmt16 evaluation campaign.
    """
    global_stats = []
    
    wmt16_group = Group.objects.filter(name='wmt16')
    wmt16_users = []
    if wmt16_group.exists():
        wmt16_users = wmt16_group[0].user_set.all()
      
    # Check how many HITs have been completed.  We now consider a HIT to be
    # completed once it has been annotated by one or more annotators.
    #
    # Before we required `hit.users.count() >= 3` for greater overlap.
    hits_completed = HIT.objects.filter(mturk_only=False, completed=True).count()
    
    # Check any remaining active HITs which are not yet marked complete.
    for hit in HIT.objects.filter(active=True, mturk_only=False, completed=False):
        if hit.users.count() >= 1:
            hits_completed = hits_completed + 1
            hit.completed = True
            hit.save()
    
    # Compute remaining HITs for all language pairs.
    hits_remaining = HIT.compute_remaining_hits()
    
    # Compute number of results contributed so far.
    ranking_results = RankingResult.objects.filter(
      item__hit__completed=True, item__hit__mturk_only=False)
    
    from math import factorial
    system_comparisons = 0
    for result in ranking_results:
        result.reload_dynamic_fields()
        combinations = factorial(result.systems)/(factorial(result.systems-2) * 2) if result.systems > 2 else 0
        system_comparisons = system_comparisons + combinations
    
    # Aggregate information about participating groups.
    groups = set()
    for user in wmt16_users:
        for group in user.groups.all():
            if group.name == 'wmt16' or group.name.startswith('eng2') \
              or group.name.endswith('2eng'):
                continue
            
            groups.add(group)
    
    # Compute average/total duration over all results.
    durations = RankingResult.objects.all().values_list('duration', flat=True)
    total_time = sum([datetime_to_seconds(x) for x in durations])
    avg_time = total_time / float(hits_completed or 1)
    avg_user_time = total_time / float(3 * hits_completed or 1)
    
    global_stats.append(('Users', len(wmt16_users)))
    global_stats.append(('Groups', len(groups)))
    global_stats.append(('HITs completed', '{0:,}'.format(hits_completed)))
    global_stats.append(('HITs remaining', '{0:,}'.format(hits_remaining)))
    global_stats.append(('Ranking results', '{0:,}'.format(ranking_results.count())))
    global_stats.append(('System comparisons', '{0:,}'.format(system_comparisons)))
    global_stats.append(('Average duration (per HIT)', seconds_to_timedelta(avg_time)))
    global_stats.append(('Average duration (per task)', seconds_to_timedelta(avg_user_time)))
    global_stats.append(('Total duration', seconds_to_timedelta(total_time)))
    
    return global_stats