예제 #1
0
    def compute(self, model):
        result = []
        if not self._freq_stats:
            self._freq_stats = IndexDump.load(model.get_path('freq_stats'))

        collection_len = self._freq_stats.collection_length()

        topics_stem = model.load_topics('topics_stem')
        queries = dict((m['qid'], text.split()) for text, m in topics_stem)

        sentences_stem = model.load_sentences('sentences_stem')
        for text, m in sentences_stem:
            stems = text.split()
            sentence_tf = collections.Counter(stems)
            sentence_len = len(stems)
            score = float(0)
            for query_stem in queries[m['qid']]:
                cf = self._freq_stats.cf(query_stem)
                if cf == 0:
                    continue
                score += math.log(
                    float(sentence_tf[query_stem] +
                          self.mu * float(cf) / collection_len) /
                    (sentence_len + self.mu))
            result.append(score)
        return result
예제 #2
0
파일: mk.py 프로젝트: kepingbi/SummaryRank
    def compute(self, model):
        result = []
        if not self._freq_stats:
            self._freq_stats = IndexDump.load(model.get_path('freq_stats'))

        collection_len = self._freq_stats.collection_length()

        topics_stem = model.load_topics('topics_stem')
        queries = dict((m['qid'], text.split()) for text, m in topics_stem)

        sentences_stem = model.load_sentences('sentences_stem')
        for text, m in sentences_stem:
            stems = text.split()
            sentence_tf = collections.Counter(stems)
            sentence_len = len(stems)
            score = float(0)
            for query_stem in queries[m['qid']]:
                cf = self._freq_stats.cf(query_stem)
                if cf == 0:
                    continue
                score += math.log(
                    float(sentence_tf[query_stem] + self.mu * float(cf) / collection_len)
                    / (sentence_len + self.mu))
            result.append(score)
        return result
예제 #3
0
    def compute(self, model):
        result = []
        if not self._freq_stats:
            self._freq_stats = IndexDump.load(model.get_path('freq_stats'))

        N = self._freq_stats.num_docs()

        topics_stem = model.load_topics('topics_stem')
        queries = dict((m['qid'], text.split()) for text, m in topics_stem)

        for text, m in model.load_sentences('sentences_stem'):
            stems = text.split()
            sentence_tf = collections.Counter(stems)
            sentence_len = len(stems)
            score = float(0)
            for query_stem in queries[m['qid']]:
                df = self._freq_stats.df(query_stem)
                comp1 = math.log(float(N - df + 0.5) / (df + 0.5))
                comp2 = float(sentence_tf[query_stem] * (self.k1 + 1))
                comp3 = sentence_tf[query_stem] + \
                        self.k1 * (1 - self.b + float(self.b * sentence_len) / self.avgdl)
                score += comp1 * comp2 / comp3
            result.append(score)
        return result
예제 #4
0
파일: mk.py 프로젝트: kepingbi/SummaryRank
    def compute(self, model):
        result = []
        if not self._freq_stats:
            self._freq_stats = IndexDump.load(model.get_path('freq_stats'))

        N = self._freq_stats.num_docs()

        topics_stem = model.load_topics('topics_stem')
        queries = dict((m['qid'], text.split()) for text, m in topics_stem)

        for text, m in model.load_sentences('sentences_stem'):
            stems = text.split()
            sentence_tf = collections.Counter(stems)
            sentence_len = len(stems)
            score = float(0)
            for query_stem in queries[m['qid']]:
                df = self._freq_stats.df(query_stem)
                comp1 = math.log(float(N - df + 0.5) / (df + 0.5))
                comp2 = float(sentence_tf[query_stem] * (self.k1 + 1))
                comp3 = sentence_tf[query_stem] + \
                        self.k1 * (1 - self.b + float(self.b * sentence_len) / self.avgdl)
                score += comp1 * comp2 / comp3
            result.append(score)
        return result