コード例 #1
0
def _compute_user_stats():
    """
    Computes user statistics for the WMT15 evaluation campaign.
    """
    user_stats = []
    
    wmt15_group = Group.objects.filter(name='WMT15')
    wmt15_users = []
    if wmt15_group.exists():
        wmt15_users = wmt15_group[0].user_set.all()
    
    for user in wmt15_users:
        _user_stats = HIT.compute_status_for_user(user)
        _name = user.username
        _avg_time = seconds_to_timedelta(_user_stats[1])
        _total_time = seconds_to_timedelta(_user_stats[2])
        _data = (_name, _user_stats[0], _avg_time, _total_time)
        
        if _data[0] > 0:
            user_stats.append(_data)
    
    # Sort by total number of completed HITs.
    user_stats.sort(key=lambda x: x[1])
    user_stats.reverse()
    
    return user_stats
コード例 #2
0
def _compute_language_pair_stats():
    """
    Computes HIT statistics per language pair.
    """
    language_pair_stats = []
    
    # TODO: move LANGUAGE_PAIR_CHOICES better place.
    #
    # Running compute_remaining_hits() will also update completion status for HITs.
    for choice in LANGUAGE_PAIR_CHOICES:
        _code = choice[0]
        _name = choice[1]
        _remaining_hits = HIT.compute_remaining_hits(language_pair=_code)
        _completed_hits = HIT.objects.filter(completed=True, mturk_only=False,
          language_pair=_code)
        
        _unique_systems_for_language_pair = set()
        for _hit in _completed_hits:
            for _result in RankingResult.objects.filter(item__hit=_hit):
                for _translation in _result.item.translations:
                    for _system in set(_translation[1]['system'].split(',')):
                         _unique_systems_for_language_pair.add(_system)
        
        _completed_hits = _completed_hits.count()
        _total_hits = _remaining_hits + _completed_hits
                
        _data = (
          _name,
          len(_unique_systems_for_language_pair),
          (_remaining_hits, 100 * _remaining_hits/float(_total_hits or 1)),
          (_completed_hits, 100 * _completed_hits/float(_total_hits or 1))
        )
        
        language_pair_stats.append(_data)
    
    return language_pair_stats
コード例 #3
0
 # Properly set DJANGO_SETTINGS_MODULE environment variable.
 os.environ['DJANGO_SETTINGS_MODULE'] = 'settings'
 PROJECT_HOME = os.path.normpath(os.getcwd() + "/..")
 sys.path.append(PROJECT_HOME)
 
 # We have just added appraise to the system path list, hence this works.
 from django.contrib.auth.models import User, Group
 from appraise.wmt15.models import HIT
 
 # Compute user statistics for all users.
 user_stats = []
 wmt15 = Group.objects.get(name='WMT15')
 users = wmt15.user_set.all()
 
 for user in users:
     _user_stats = HIT.compute_status_for_user(user)
     _name = user.username
     _email = user.email
     
     _group = "UNDEFINED"
     for _g in user.groups.all():
         if _g.name.startswith("eng2") \
           or _g.name.endswith("2eng") \
           or _g.name == "WMT15":
             continue
         
         _group = _g.name
         break
     
     _data = (_name, _email, _group, _user_stats[0], _user_stats[2])
     user_stats.append(_data)
コード例 #4
0
            # Hotfix potentially wrong ISO codes;  we are using ISO-639-3.
            iso_639_2_to_3_mapping = {'cze': 'ces', 'fre': 'fra', 'ger': 'deu'}
            for part2_code, part3_code in iso_639_2_to_3_mapping.items():
                language_pair = language_pair.replace(part2_code, part3_code)
        
            try:
                _total = _total + 1
                _hit_xml = tostring(_child, encoding="utf-8").decode('utf-8')
            
                if args.dry_run_enabled:
                    _ = HIT(block_id=block_id, hit_xml=_hit_xml,
                      language_pair=language_pair, mturk_only=args.mturk_only)
            
                else:
                    # Use get_or_create() to avoid exact duplicates.  We do allow
                    # them for WMT15 to measure intra-annotator agreement...
                    h = HIT(block_id=block_id, hit_xml=_hit_xml,
                      language_pair=language_pair, mturk_only=args.mturk_only)
                    h.save()
        
            # pylint: disable-msg=W0703
            except Exception, msg:
                print msg
                _errors = _errors + 1
    
        print
        print '[{0}]'.format(_hits_file)
        print 'Successfully imported {0} HITs, encountered errors for ' \
          '{1} HITs.'.format(_total, _errors)
        print
コード例 #5
0
def _compute_group_stats():
    """
    Computes group statistics for the WMT15 evaluation campaign.
    """
    group_stats = []
    
    wmt15_group = Group.objects.filter(name='WMT15')
    wmt15_users = []
    if wmt15_group.exists():
        wmt15_users = wmt15_group[0].user_set.all()
    
    # Aggregate information about participating groups.
    groups = set()
    for user in wmt15_users:
        for group in user.groups.all():
            if group.name == 'WMT15' or group.name.startswith('eng2') \
              or group.name.endswith('2eng'):
                continue
            
            groups.add(group)
            
    # TODO: move this to property of evaluation group or add dedicated data model.
    # GOAL: should be configurable from within the Django admin backend.
    #
    # MINIMAL: move to local_settings.py?
    #
    # The following dictionary defines the number of HITs each group should
    # have completed during the WMT15 evaluation campaign.
    group_hit_requirements = {
      # volunteers
      'MSR': 0,
      'JHU': 0,
      'MTMA': 0,
      # participants, confirmed
      'Abu-MaTran': 600,
      'CIMS': 200,
      'GU': 100,
      'HKUST': 100,
      'IMS': 100,
      'MITLL-AFRL': 300,
      'PROMT': 400,
      'UB': 100,
      'UEDIN': 1700,
      'UFAL': 1600,
      'UIUC': 400,
      'UM': 200,
      'UMontreal': 500,
      'UPF': 100,
      'UU': 200,
      'YSDA': 100,
      # participants, inactive
      'Aalto': 100,
      'DFKI': 200,
      'KIT': 200,
      'LIMSI': 600,
      'USAAR': 500,
      'UoS': 200,
      'RWTH': 100,
      'CMU': 100,
    }
    
    for group in groups:
        _name = group.name
        if not _name in group_hit_requirements.keys():
            continue
        
        _group_stats = HIT.compute_status_for_group(group)
        _total = _group_stats[0]
        _required = group_hit_requirements[_name]
        _delta = _total - _required
        _data = (_total, _required, _delta)
        
        if _data[0] > 0:
            group_stats.append((_name, _data))
    
    # Sort by number of remaining HITs.
    group_stats.sort(key=lambda x: x[1][2])
    
    # Add totals at the bottom.
    global_total = sum([x[1][0] for x in group_stats])
    global_required = sum([x[1][1] for x in group_stats])
    global_delta = global_total - global_required
    global_data = (global_total, global_required, global_delta)
    group_stats.append(("Totals", global_data))
    
    return group_stats
コード例 #6
0
def _compute_global_stats():
    """
    Computes some global statistics for the WMT15 evaluation campaign.
    """
    global_stats = []
    
    wmt15_group = Group.objects.filter(name='WMT15')
    wmt15_users = []
    if wmt15_group.exists():
        wmt15_users = wmt15_group[0].user_set.all()
      
    # Check how many HITs have been completed.  We now consider a HIT to be
    # completed once it has been annotated by one or more annotators.
    #
    # Before we required `hit.users.count() >= 3` for greater overlap.
    hits_completed = HIT.objects.filter(mturk_only=False, completed=True).count()
    
    # Check any remaining active HITs which are not yet marked complete.
    for hit in HIT.objects.filter(active=True, mturk_only=False, completed=False):
        if hit.users.count() >= 1:
            hits_completed = hits_completed + 1
            hit.completed = True
            hit.save()
    
    # Compute remaining HITs for all language pairs.
    hits_remaining = HIT.compute_remaining_hits()
    
    # Compute number of results contributed so far.
    ranking_results = RankingResult.objects.filter(
      item__hit__completed=True, item__hit__mturk_only=False)
    
    from math import factorial
    system_comparisons = 0
    for result in ranking_results:
        result.reload_dynamic_fields()
        combinations = factorial(result.systems)/(factorial(result.systems-2) * 2) if result.systems > 2 else 0
        system_comparisons = system_comparisons + combinations
    
    # Aggregate information about participating groups.
    groups = set()
    for user in wmt15_users:
        for group in user.groups.all():
            if group.name == 'WMT15' or group.name.startswith('eng2') \
              or group.name.endswith('2eng'):
                continue
            
            groups.add(group)
    
    # Compute average/total duration over all results.
    durations = RankingResult.objects.all().values_list('duration', flat=True)
    total_time = sum([datetime_to_seconds(x) for x in durations])
    avg_time = total_time / float(hits_completed or 1)
    avg_user_time = total_time / float(3 * hits_completed or 1)
    
    global_stats.append(('Users', len(wmt15_users)))
    global_stats.append(('Groups', len(groups)))
    global_stats.append(('HITs completed', '{0:,}'.format(hits_completed)))
    global_stats.append(('HITs remaining', '{0:,}'.format(hits_remaining)))
    global_stats.append(('Ranking results', '{0:,}'.format(ranking_results.count())))
    global_stats.append(('System comparisons', '{0:,}'.format(system_comparisons)))
    global_stats.append(('Average duration (per HIT)', seconds_to_timedelta(avg_time)))
    global_stats.append(('Average duration (per task)', seconds_to_timedelta(avg_user_time)))
    global_stats.append(('Total duration', seconds_to_timedelta(total_time)))
    
    return global_stats
コード例 #7
0
def overview(request):
    """
    Renders the evaluation tasks overview.
    """
    LOGGER.info('Rendering WMT15 HIT overview for user "{0}".'.format(
      request.user.username or "Anonymous"))
    
    # Re-initialise random number generator.
    seed(None)
    
    # Collect available language pairs for the current user.
    language_codes = set([x[0] for x in LANGUAGE_PAIR_CHOICES])
    language_pairs = request.user.groups.filter(name__in=language_codes)
    
    hit_data = []
    total = [0, 0, 0]
    for language_pair in language_pairs:
        hit = _compute_next_task_for_user(request.user, language_pair)
        user_status = HIT.compute_status_for_user(request.user, language_pair)
        for i in range(3):
            total[i] = total[i] + user_status[i]
        
        if hit:
            # Convert status seconds back into datetime.time instances.
            for i in range(2):
                user_status[i+1] = seconds_to_timedelta(int(user_status[i+1]))
            
            hit_data.append(
              (hit.get_language_pair_display(), hit.get_absolute_url(),
               hit.hit_id, user_status)
            )
    
    # Convert total seconds back into datetime.timedelta instances.
    total[1] = seconds_to_timedelta(int(total[2]) / float(int(total[0]) or 1))
    
    # Remove microseconds to get a nicer timedelta rendering in templates.
    total[1] = total[1] - timedelta(microseconds=total[1].microseconds)
    
    total[2] = seconds_to_timedelta(int(total[2]))
    
    group = None
    for _group in request.user.groups.all():
        if _group.name == 'WMT15' \
          or _group.name.startswith('eng2') \
          or _group.name.endswith('2eng'):
            continue
        
        group = _group
        break
    
    if group is not None:
        group_name = group.name
        group_status = HIT.compute_status_for_group(group)
        for i in range(2):
            group_status[i+1] = seconds_to_timedelta(int(group_status[i+1]))
    
    else:
        group_status = None
        group_name = None
    
    LOGGER.debug(u'\n\nHIT data for user "{0}":\n\n{1}\n'.format(
      request.user.username or "Anonymous",
      u'\n'.join([u'{0}\t{1}\t{2}\t{3}'.format(*x) for x in hit_data])))

    # Compute admin URL for super users.
    admin_url = None
    if request.user.is_superuser:
        admin_url = reverse('admin:index')
    
    dictionary = {
      'active_page': "OVERVIEW",
      'hit_data': hit_data,
      'total': total,
      'group_name': group_name,
      'group_status': group_status,
      'admin_url': admin_url,
      'title': 'WMT15 Dashboard',
    }
    dictionary.update(BASE_CONTEXT)
    
    LOGGER.info(dictionary.values())
    
    return render(request, 'wmt15/overview.html', dictionary)