Ejemplo n.º 1
0
def get_ranking_scores(data, predicted_rank_name, original_rank_name,
                       invert_ranks = False,
                       filter_ref = True,
                       suffix = "",
                       prefix = "",
                       replace_predicted = None,
                       **kwargs):
    predicted_rank_vectors = []
    original_rank_vectors = []
    missing_predicted_values = 0

    for parallelsentence in data.get_parallelsentences():

        if filter_ref:
            #get a vector with all the rank_strings labels from all systems apart from the references
            try:
                predicted_rank_vector = parallelsentence.get_filtered_target_attribute_values(predicted_rank_name, 
                                                                                         filter_attribute_name="system", 
                                                                                         filter_attribute_value="_ref")
            except Exception as e:
                logging.error("Could not get desired rank_name '{}' from parallelsentence with attributes: \n{}".format(predicted_rank_name, parallelsentence.get_attributes()))
                missing_predicted_values += 1
                raise Exception(e)

            original_rank_vector = parallelsentence.get_filtered_target_attribute_values(original_rank_name, 
                                                                                        filter_attribute_name="system", 
                                                                                        filter_attribute_value="_ref")
        else:
            #get a vector with all the rank_strings labels
            predicted_rank_vector = parallelsentence.get_target_attribute_values(predicted_rank_name)
            original_rank_vector = parallelsentence.get_target_attribute_values(original_rank_name)
        #construct ranking objects

        
        try:
            predicted_ranking = Ranking(predicted_rank_vector)
            original_ranking = Ranking(original_rank_vector)
            #invert rankings if requested
            if invert_ranks:
                predicted_ranking = predicted_ranking.reverse()
                #original_ranking = original_ranking.reverse()
        except Exception as e:
            logging.error("Error while processing Parallelsentence with attributes {}".format(parallelsentence.get_attributes()))
            logging.error("ranking that caused the error: predicted: {}, original: {}".format(predicted_ranking, original_ranking))
            raise Exception(e)

        #add the ranking in the big vector with all previous parallel sentences
        predicted_rank_vectors.append(predicted_ranking)
        original_rank_vectors.append(original_ranking)

    if missing_predicted_values:
        logging.warning("{} sentences where missing the {}".format(missing_predicted_values, predicted_rank_name))
    stats = OrderedDict()
    
    #process the list of rankings with all metric functions and collect the
    #results in an ordered dict
    for callback in SET_METRIC_FUNCTIONS:
        current_stats = callback(predicted_rank_vectors, original_rank_vectors)
        stats.update(current_stats)
        
    #add the requested preffix and suffix to every value    
    stats = OrderedDict([("{}-{}{}".format(prefix, key, suffix),value) for key,value in stats.iteritems()])
    return stats
Ejemplo n.º 2
0
def get_baseline_ranking_scores(data, baseline_name, original_rank_name,
                       invert_ranks = False,
                       filter_ref = True,
                       suffix = "",
                       prefix = "",
                       **kwargs):
    
    predicted_rank_vectors = []
    original_rank_vectors = []
    
    for parallelsentence in data.get_parallelsentences():

        if filter_ref:
            #get a vector with all the rank_strings labels from all systems apart from the references
            original_rank_vector = parallelsentence.get_filtered_target_attribute_values(original_rank_name, 
                                                                                        filter_attribute_name="system", 
                                                                                        filter_attribute_value="_ref")
        else:
            original_rank_vector = parallelsentence.get_target_attribute_values(original_rank_name)
            
        ranking_length = len(original_rank_vector)
        if baseline_name == "fixed":
            predicted_rank_vector = [1] * ranking_length
        elif baseline_name == "random":
            random.seed()
            predicted_rank_vector = [random.randint(1, ranking_length) for i in range(ranking_length)]
        elif baseline_name == "random_noties": 
            random.seed()
            predicted_rank_vector = range(1, ranking_length+1)
            random.shuffle(predicted_rank_vector)
        elif baseline_name == "alphabetical":
            system_names = parallelsentence.get_filtered_target_attribute_values("system", filter_attribute_name="system", filter_attribute_value="_ref")
            sorted_system_names = sorted(system_names)
            predicted_rank_vector = [sorted_system_names.index(name)+1 for name in system_names]
        elif baseline_name == "alphabetical_inv":
            system_names = parallelsentence.get_filtered_target_attribute_values("system", filter_attribute_name="system", filter_attribute_value="_ref")
            sorted_system_names = sorted(system_names, reverse=True)
            predicted_rank_vector = [sorted_system_names.index(name)+1 for name in system_names]   
        else:
            raise Exception("You provided a baseline name that is not supported {}".format(baseline_name))
        try:
            predicted_ranking = Ranking(predicted_rank_vector)
            original_ranking = Ranking(original_rank_vector)
            #invert rankings if requested
            if invert_ranks:
                predicted_ranking = predicted_ranking.reverse()
                #original_ranking = original_ranking.reverse()
        except Exception as e:
            logging.error("Error while processing Parallelsentence with attributes {}".format(parallelsentence.get_attributes()))
            logging.error("ranking that caused the error: predicted: {}, original: {}".format(predicted_ranking, original_ranking))
            raise Exception(e)

        #add the ranking in the big vector with all previous parallel sentences
        predicted_rank_vectors.append(predicted_ranking)
        original_rank_vectors.append(original_ranking)

    stats = OrderedDict()
    
    #process the list of rankings with all metric functions and collect the
    #results in an ordered dict
    for callback in SET_METRIC_FUNCTIONS:
        current_stats = callback(predicted_rank_vectors, original_rank_vectors)
        stats.update(current_stats)
        
    #add the requested preffix and suffix to every value    
    stats = OrderedDict([("{}-{}{}".format(prefix, key, suffix),value) for key,value in stats.iteritems()])
    return stats