def get_ranking_scores(data, predicted_rank_name, original_rank_name, invert_ranks = False, filter_ref = True, suffix = "", prefix = "", replace_predicted = None, **kwargs): predicted_rank_vectors = [] original_rank_vectors = [] missing_predicted_values = 0 for parallelsentence in data.get_parallelsentences(): if filter_ref: #get a vector with all the rank_strings labels from all systems apart from the references try: predicted_rank_vector = parallelsentence.get_filtered_target_attribute_values(predicted_rank_name, filter_attribute_name="system", filter_attribute_value="_ref") except Exception as e: logging.error("Could not get desired rank_name '{}' from parallelsentence with attributes: \n{}".format(predicted_rank_name, parallelsentence.get_attributes())) missing_predicted_values += 1 raise Exception(e) original_rank_vector = parallelsentence.get_filtered_target_attribute_values(original_rank_name, filter_attribute_name="system", filter_attribute_value="_ref") else: #get a vector with all the rank_strings labels predicted_rank_vector = parallelsentence.get_target_attribute_values(predicted_rank_name) original_rank_vector = parallelsentence.get_target_attribute_values(original_rank_name) #construct ranking objects try: predicted_ranking = Ranking(predicted_rank_vector) original_ranking = Ranking(original_rank_vector) #invert rankings if requested if invert_ranks: predicted_ranking = predicted_ranking.reverse() #original_ranking = original_ranking.reverse() except Exception as e: logging.error("Error while processing Parallelsentence with attributes {}".format(parallelsentence.get_attributes())) logging.error("ranking that caused the error: predicted: {}, original: {}".format(predicted_ranking, original_ranking)) raise Exception(e) #add the ranking in the big vector with all previous parallel sentences predicted_rank_vectors.append(predicted_ranking) original_rank_vectors.append(original_ranking) if missing_predicted_values: logging.warning("{} sentences where missing the {}".format(missing_predicted_values, predicted_rank_name)) stats = OrderedDict() #process the list of rankings with all metric functions and collect the #results in an ordered dict for callback in SET_METRIC_FUNCTIONS: current_stats = callback(predicted_rank_vectors, original_rank_vectors) stats.update(current_stats) #add the requested preffix and suffix to every value stats = OrderedDict([("{}-{}{}".format(prefix, key, suffix),value) for key,value in stats.iteritems()]) return stats
def get_baseline_ranking_scores(data, baseline_name, original_rank_name, invert_ranks = False, filter_ref = True, suffix = "", prefix = "", **kwargs): predicted_rank_vectors = [] original_rank_vectors = [] for parallelsentence in data.get_parallelsentences(): if filter_ref: #get a vector with all the rank_strings labels from all systems apart from the references original_rank_vector = parallelsentence.get_filtered_target_attribute_values(original_rank_name, filter_attribute_name="system", filter_attribute_value="_ref") else: original_rank_vector = parallelsentence.get_target_attribute_values(original_rank_name) ranking_length = len(original_rank_vector) if baseline_name == "fixed": predicted_rank_vector = [1] * ranking_length elif baseline_name == "random": random.seed() predicted_rank_vector = [random.randint(1, ranking_length) for i in range(ranking_length)] elif baseline_name == "random_noties": random.seed() predicted_rank_vector = range(1, ranking_length+1) random.shuffle(predicted_rank_vector) elif baseline_name == "alphabetical": system_names = parallelsentence.get_filtered_target_attribute_values("system", filter_attribute_name="system", filter_attribute_value="_ref") sorted_system_names = sorted(system_names) predicted_rank_vector = [sorted_system_names.index(name)+1 for name in system_names] elif baseline_name == "alphabetical_inv": system_names = parallelsentence.get_filtered_target_attribute_values("system", filter_attribute_name="system", filter_attribute_value="_ref") sorted_system_names = sorted(system_names, reverse=True) predicted_rank_vector = [sorted_system_names.index(name)+1 for name in system_names] else: raise Exception("You provided a baseline name that is not supported {}".format(baseline_name)) try: predicted_ranking = Ranking(predicted_rank_vector) original_ranking = Ranking(original_rank_vector) #invert rankings if requested if invert_ranks: predicted_ranking = predicted_ranking.reverse() #original_ranking = original_ranking.reverse() except Exception as e: logging.error("Error while processing Parallelsentence with attributes {}".format(parallelsentence.get_attributes())) logging.error("ranking that caused the error: predicted: {}, original: {}".format(predicted_ranking, original_ranking)) raise Exception(e) #add the ranking in the big vector with all previous parallel sentences predicted_rank_vectors.append(predicted_ranking) original_rank_vectors.append(original_ranking) stats = OrderedDict() #process the list of rankings with all metric functions and collect the #results in an ordered dict for callback in SET_METRIC_FUNCTIONS: current_stats = callback(predicted_rank_vectors, original_rank_vectors) stats.update(current_stats) #add the requested preffix and suffix to every value stats = OrderedDict([("{}-{}{}".format(prefix, key, suffix),value) for key,value in stats.iteritems()]) return stats