コード例 #1
0
ファイル: base_result.py プロジェクト: DaniFdezAlvarez/wMERA
    def __init__(self, uri, entity, query):
        """
        Represents the scores obtained after comparing the entity "entity"
        with the URI "uri" in the graph againt the query "query", storing
        all the forms compared and subscores

        :param uri: uri of the entity in the graph
        :param entity: model object used
        :param query: text to compare with the forms of the model object
        :return:
        """
        self._entity = entity
        self._uri = uri
        self._query = query
        self._forms = {}
        self._refinements = {}
        self._comparator = StringComparator([Levenstein, WeightedDamerauLevenshtein])
コード例 #2
0
ファイル: base_result.py プロジェクト: DaniFdezAlvarez/wMERA
class MeraBaseResult(object):
    def __init__(self, uri, entity, query):
        """
        Represents the scores obtained after comparing the entity "entity"
        with the URI "uri" in the graph againt the query "query", storing
        all the forms compared and subscores

        :param uri: uri of the entity in the graph
        :param entity: model object used
        :param query: text to compare with the forms of the model object
        :return:
        """
        self._entity = entity
        self._uri = uri
        self._query = query
        self._forms = {}
        self._refinements = {}
        self._comparator = StringComparator([Levenstein, WeightedDamerauLevenshtein])

    @property
    def uri(self):
        return self._uri

    @property
    def entity(self):
        return self._entity

    @property
    def query(self):
        return self._query


    def __cmp__(self, other):
        """
        Criteria:
        It will be considered greater the entity with bigger get_max_score punctuation.

        In case of draw, it will be considered greater the one a higher score in any of
        its primary forms (without refinements).

        In case of draw again, the object with the highest two scores. And so on.

        If we reach the len with a draw in all its elements, it will be considered
        higher the one with the sorter list
        Examples when we obtain the same score in get_max_score(result sorting
        entities with the greater first):

        With this scores: [1,0.43, 0.2]  , [0.99, 0.89, 0.86], we sort like this:

        1. [1,0.43, 0.2]
        2. [0.99, 0.89, 0.86]

        With this scores: [1, 0.90, 0.0]   ,  [1, 0.89, 0.43, 0.42] , we sort like this
         1. [1, 0.90, 0.0]
         2. [1, 0.89, 0.43, 0.42]

        With this scores: [1, 0.89, 0.43]  , [1, 0.89, 0.43, 0.42]
        we sort like this:

        1. [1, 0.89, 0.43]
        2. [1, 0.89, 0.43, 0.42]


        :param other:
        :return:
        """
        if not isinstance(other, MeraBaseResult):
            raise NotImplementedError("Unsupported Comparison")

        self_score = self.get_max_score()
        other_score = other.get_max_score()
        if self_score > other_score:
            return 1
        elif self_score < other_score:
            return -1

        else:
            self_base_scores = self._forms.values()
            self_base_scores.sort(reverse=True)
            other_base_scores = other._forms.values()
            other_base_scores.sort(reverse=True)

            index = 0

            while index < len(self_base_scores) and index < len(other_base_scores):
                if self_base_scores[index] < other_base_scores[index]:
                    return -1
                if self_base_scores[index] < other_base_scores[index]:
                    return 1
                index += 1

            if len(self_base_scores) < len(other_base_scores):
                return 1
            elif len(self_base_scores) > len(other_base_scores):
                return -1

        return 0


    def __str__(self):
        return "{\nCANONICAL: " + self._entity.canonical + "\nFORMS: [" + str(self._forms) + \
               "]\nMAX SCORE WITH REFS: " + str(self.get_max_score()) + "\n"


    def get_refinements(self):
        """

        :return:
        """
        result = []
        for ref_type in self._refinements:
            for a_query in self._refinements[ref_type][QUERIES_REFINEMENT]:
                tmp_dict = {}
                tmp_dict['type'] = ref_type
                tmp_dict['relevance'] = self._refinements[ref_type][RELEVANCE_REFINEMENT]
                tmp_dict['content'] = a_query
                tmp_dict['matched_forms'] = {}
                target_forms = self._refinements[ref_type][QUERIES_REFINEMENT][a_query]
                for a_form in target_forms:
                    tmp_dict['matched_forms'][a_form] = target_forms[a_form]
                result.append(tmp_dict)
        return result

    def add_refinement(self, refinement_type, relevance, minimum_acceptable, query):
        """
        Add a refinement type to the result object. The type provided (through a string) should be included
        in a known range.

        With relevant we are specifying how important is obtaining high coincidences comparing the forms
        (0 the less, 1 the greatest).

        minimum_acceptable specified the minimun score that should be obtained when comparings
        in order to not discard the result (not mattering the relevance factor).

        Query is the string query against we will comparing the forms


        :param refinement_type: string inside a known range of types and used only once with this method
        :param relevance: float between 0 and 1
        :param query: str containing the query against which we are refining
        :param minimum_acceptable: float between 0 and 1
        :return:
        """
        # TODO: revise this doc

        if refinement_type not in self._refinements:
            self._refinements[refinement_type] = {}
            self._refinements[refinement_type][QUERIES_REFINEMENT] = {}
            self._refinements[refinement_type][MINIMUM_ACCEPTABLE_SCORE_REFINEMENT] = minimum_acceptable
            self._refinements[refinement_type][RELEVANCE_REFINEMENT] = relevance

        self._refinements[refinement_type][QUERIES_REFINEMENT][query] = {}

    def add_form_comparison(self, str_form):
        """
        It expects a string to compare it with self._query.
        It compares the query with the received form and stores the result

        :param str_form:
        :return:
        """
        score = self._comparator.compare_str(self._query, str_form)
        self._forms[str_form] = score

    def get_matched_forms(self):
        """
        Return tuples of (form, score)
        :return:
        """
        for a_form in self._forms:
            yield (a_form, self._forms[a_form])


    def add_refinement_form_comparison(self, str_form, refinement_type, query_refinement):
        """
        it compares str_form with self._refinements[refinement_type][QUERY_REFINEMENT],
        and stores the result in self._refinements[refinement_type][FORMS_REFINEMENT]
        if it is >= than self._refinements[refinement_type][MINIMUM_ACCEPTABLE_SCORE_REFINEMENT]
        :param str_form:
        :param refinement_type:
        :return:
        """
        # TODO: revise this doc
        score = self._comparator.compare_str(query_refinement, str_form)
        if score >= self._refinements[refinement_type][MINIMUM_ACCEPTABLE_SCORE_REFINEMENT]:
            self._refinements[refinement_type][QUERIES_REFINEMENT][query_refinement][str_form] = score


    def get_max_score(self):
        """
        It returns a float obtaining form adding the max value in self._forms and all the max
        values of each self_refinements dicts.
        :return:
        """
        max_result = self.get_max_score_without_refinements()
        for a_ref_type_key in self._refinements:
            a_refinement_type = self._refinements[a_ref_type_key]
            for a_ref_query in a_refinement_type[QUERIES_REFINEMENT]:
                if len(a_refinement_type[QUERIES_REFINEMENT][a_ref_query]) > 0:
                    max_result += a_refinement_type[RELEVANCE_REFINEMENT] * max(
                        a_refinement_type[QUERIES_REFINEMENT][a_ref_query].values())
        return max_result


    def get_max_score_without_refinements(self):
        """
         It returns a float with the max value in self._forms
        :return:
        """
        return max(self._forms.values())