Example #1
0
    def distance(self, query, entity):
        q_len = len(query)
        e_len = len(entity)

        if q_len == 0 or e_len == 0:
            return 1.0
        else:
            query = dict(sorted(query.iteritems(), key=operator.itemgetter(1), reverse=True)[:100])
            entity = dict(sorted(entity.iteritems(), key=operator.itemgetter(1), reverse=True)[:100])
            
            query_wrs = list(self.iter_word_reps(query))
            entity_wrs = list(self.iter_word_reps(entity))

            a = entity_wrs if e_len >= q_len else query_wrs
            b = entity_wrs if q_len >  e_len else query_wrs

            total = 0.0
            for wb in b:
                max_sim = 0.0
                for wa in a:
                    sim = dense_cosine_distance(query, entity)
                    #log.debug("SIM: %f", sim)
                    if sim > max_sim:
                        max_sim = sim
                total += max_sim

            #llm_sim = sum(max(self.dense_cosine(wa, wb) for wa in a) for wb in b) / float(len(b))
            #log.debug('LLM SIM: %f - %i - %i - %f' % (total, len(b), len(a), total / len(b)))
            return 1.0 - (total / len(b))
Example #2
0
    def distance(self, query, entity):
        q_len = len(query)
        e_len = len(entity)

        if q_len == 0 or e_len == 0:
            return 1.0
        else:
            query = dict(
                sorted(query.iteritems(),
                       key=operator.itemgetter(1),
                       reverse=True)[:100])
            entity = dict(
                sorted(entity.iteritems(),
                       key=operator.itemgetter(1),
                       reverse=True)[:100])

            query_wrs = list(self.iter_word_reps(query))
            entity_wrs = list(self.iter_word_reps(entity))

            a = entity_wrs if e_len >= q_len else query_wrs
            b = entity_wrs if q_len > e_len else query_wrs

            total = 0.0
            for wb in b:
                max_sim = 0.0
                for wa in a:
                    sim = dense_cosine_distance(query, entity)
                    #log.debug("SIM: %f", sim)
                    if sim > max_sim:
                        max_sim = sim
                total += max_sim

            #llm_sim = sum(max(self.dense_cosine(wa, wb) for wa in a) for wb in b) / float(len(b))
            #log.debug('LLM SIM: %f - %i - %i - %f' % (total, len(b), len(a), total / len(b)))
            return 1.0 - (total / len(b))
Example #3
0
 def distance(self, query, entity):
     query = self.bow_to_dbow(query)
     entity = self.bow_to_dbow(entity)
     return dense_cosine_distance(query, entity)
Example #4
0
 def distance(self, query, entity):
     query = self.bow_to_dbow(query)
     entity = self.bow_to_dbow(entity)
     return dense_cosine_distance(query, entity)