def _compute_global_stats(): """ Computes some global statistics for the WMT14 evaluation campaign. """ global_stats = [] wmt14 = Group.objects.get(name='WMT14') users = wmt14.user_set.all() # Check how many HITs have been completed. We now consider a HIT to be # completed once it has been annotated by one or more annotators. # # Before we required `hit.users.count() >= 3` for greater overlap. hits_completed = HIT.objects.filter(mturk_only=False, completed=True).count() # Check any remaining active HITs which are not yet marked complete. for hit in HIT.objects.filter(active=True, mturk_only=False, completed=False): if hit.users.count() >= 1: hits_completed = hits_completed + 1 hit.completed = True hit.save() # Compute remaining HITs for all language pairs. hits_remaining = HIT.compute_remaining_hits() # Compute number of results contributed so far. ranking_results = RankingResult.objects.filter( item__hit__completed=True, item__hit__mturk_only=False).count() # Aggregate information about participating groups. groups = set() for user in users: for group in user.groups.all(): if group.name == 'WMT14' or group.name.startswith('eng2') \ or group.name.endswith('2eng'): continue groups.add(group) # Compute average/total duration over all results. durations = RankingResult.objects.all().values_list('duration', flat=True) total_time = sum([datetime_to_seconds(x) for x in durations]) avg_time = total_time / float(hits_completed or 1) avg_user_time = total_time / float(3 * hits_completed or 1) global_stats.append(('Users', users.count())) global_stats.append(('Groups', len(groups))) global_stats.append(('HITs completed', hits_completed)) global_stats.append(('HITs remaining', hits_remaining)) global_stats.append(('Ranking results', ranking_results)) global_stats.append(('System comparisons', 10 * ranking_results)) global_stats.append(('Average duration', seconds_to_timedelta(avg_time))) global_stats.append(('Average duration (single user)', seconds_to_timedelta(avg_user_time))) global_stats.append(('Total duration', seconds_to_timedelta(total_time))) return global_stats
def _compute_global_stats(): """ Computes some global statistics for the WMT14 evaluation campaign. """ global_stats = [] wmt14 = Group.objects.get(name='WMT14') users = wmt14.user_set.all() # Check how many HITs have been completed. We now consider a HIT to be # completed once it has been annotated by one or more annotators. # # Before we required `hit.users.count() >= 3` for greater overlap. hits_completed = HIT.objects.filter(mturk_only=False, completed=True).count() # Check any remaining active HITs which are not yet marked complete. for hit in HIT.objects.filter(active=True, mturk_only=False, completed=False): if hit.users.count() >= 1: hits_completed = hits_completed + 1 hit.completed = True hit.save() # Compute remaining HITs for all language pairs. hits_remaining = HIT.compute_remaining_hits() # Compute number of results contributed so far. ranking_results = RankingResult.objects.filter( item__hit__completed=True, item__hit__mturk_only=False).count() # Aggregate information about participating groups. groups = set() for user in users: for group in user.groups.all(): if group.name == 'WMT14' or group.name.startswith('eng2') \ or group.name.endswith('2eng'): continue groups.add(group) # Compute average/total duration over all results. durations = RankingResult.objects.all().values_list('duration', flat=True) total_time = sum([datetime_to_seconds(x) for x in durations]) avg_time = total_time / float(hits_completed or 1) avg_user_time = total_time / float(3 * hits_completed or 1) global_stats.append(('Users', users.count())) global_stats.append(('Groups', len(groups))) global_stats.append(('HITs completed', hits_completed)) global_stats.append(('HITs remaining', hits_remaining)) global_stats.append(('Ranking results', ranking_results)) global_stats.append(('System comparisons', 10 * ranking_results)) global_stats.append(('Average duration', seconds_to_timedelta(avg_time))) global_stats.append(('Average duration (single user)', seconds_to_timedelta(avg_user_time))) global_stats.append(('Total duration', seconds_to_timedelta(total_time))) return global_stats
def _compute_language_pair_stats(): """ Computes HIT statistics per language pair. """ language_pair_stats = [] # Running compute_remaining_hits() will also update completion status for HITs. for choice in LANGUAGE_PAIR_CHOICES: _code = choice[0] _name = choice[1] _remaining_hits = HIT.compute_remaining_hits(language_pair=_code) _completed_hits = HIT.objects.filter(completed=True, mturk_only=False, language_pair=_code).count() _total_hits = _remaining_hits + _completed_hits _data = (_name, (_remaining_hits, 100 * _remaining_hits / float(_total_hits or 1)), (_completed_hits, 100 * _completed_hits / float(_total_hits or 1))) language_pair_stats.append(_data) return language_pair_stats
def _compute_language_pair_stats(): """ Computes HIT statistics per language pair. """ language_pair_stats = [] # Running compute_remaining_hits() will also update completion status for HITs. for choice in LANGUAGE_PAIR_CHOICES: _code = choice[0] _name = choice[1] _remaining_hits = HIT.compute_remaining_hits(language_pair=_code) _completed_hits = HIT.objects.filter(completed=True, mturk_only=False, language_pair=_code).count() _total_hits = _remaining_hits + _completed_hits _data = ( _name, (_remaining_hits, 100 * _remaining_hits/float(_total_hits or 1)), (_completed_hits, 100 * _completed_hits/float(_total_hits or 1)) ) language_pair_stats.append(_data) return language_pair_stats
usage: export_wmt14_status.py Exports HIT status for all language pairs. """ from datetime import datetime import os import sys if __name__ == "__main__": # Properly set DJANGO_SETTINGS_MODULE environment variable. os.environ['DJANGO_SETTINGS_MODULE'] = 'settings' PROJECT_HOME = os.path.normpath(os.getcwd() + "/..") sys.path.append(PROJECT_HOME) # We have just added appraise to the system path list, hence this works. from appraise.wmt14.models import HIT, LANGUAGE_PAIR_CHOICES remaining_hits = {} for language_pair in [x[0] for x in LANGUAGE_PAIR_CHOICES]: remaining_hits[language_pair] = HIT.compute_remaining_hits( language_pair=language_pair) print print '[{0}]'.format(datetime.now().strftime("%c")) for k, v in remaining_hits.items(): print '{0}: {1:03d}'.format(k, v) print
Author: Christian Federmann <*****@*****.**> usage: export_wmt14_status.py Exports HIT status for all language pairs. """ from datetime import datetime import os import sys if __name__ == "__main__": # Properly set DJANGO_SETTINGS_MODULE environment variable. os.environ['DJANGO_SETTINGS_MODULE'] = 'settings' PROJECT_HOME = os.path.normpath(os.getcwd() + "/..") sys.path.append(PROJECT_HOME) # We have just added appraise to the system path list, hence this works. from appraise.wmt14.models import HIT, LANGUAGE_PAIR_CHOICES remaining_hits = {} for language_pair in [x[0] for x in LANGUAGE_PAIR_CHOICES]: remaining_hits[language_pair] = HIT.compute_remaining_hits( language_pair=language_pair) print print '[{0}]'.format(datetime.now().strftime("%c")) for k, v in remaining_hits.items(): print '{0}: {1:03d}'.format(k, v) print