コード例 #1
0
 def test_aggregate_max(self):
     keywords = [[("foo", 0.1)],
                 [("foo", 0.3), ("bar", 0.6)],
                 [("foo", 0.5)]]
     scores = AggregationMethods.max(keywords)
     self.assertEqual(scores[0], ("foo", 0.5))
     self.assertEqual(scores[1], ("bar", 0.6))
コード例 #2
0
 def test_aggregate_mean(self):
     keywords = [[("foo", 0.1)],
                 [("foo", 0.3), ("bar", 0.6)],
                 [("foo", 0.5)]]
     scores = AggregationMethods.mean(keywords)
     self.assertEqual(scores[0][0], "foo")
     self.assertEqual(scores[1][0], "bar")
     self.assertAlmostEqual(scores[0][1], 0.3)
     self.assertAlmostEqual(scores[1][1], 0.2)
コード例 #3
0
    def test_aggregate(self):
        keywords = [[("foo", 0.1)],
                    [("foo", 0.3), ("bar", 0.6)],
                    [("foo", 0.5)]]
        scores = AggregationMethods.aggregate(keywords, AggregationMethods.MEAN)
        self.assertEqual(scores[0][0], "foo")
        self.assertEqual(scores[1][0], "bar")
        self.assertAlmostEqual(scores[0][1], 0.3)
        self.assertAlmostEqual(scores[1][1], 0.2)

        scores = AggregationMethods.aggregate(keywords,
                                              AggregationMethods.MEDIAN)
        self.assertEqual(scores[0], ("foo", 0.3))
        self.assertEqual(scores[1], ("bar", 0.6))

        scores = AggregationMethods.aggregate(keywords, AggregationMethods.MIN)
        self.assertEqual(scores[0], ("foo", 0.1))
        self.assertEqual(scores[1], ("bar", 0.6))

        scores = AggregationMethods.aggregate(keywords, AggregationMethods.MAX)
        self.assertEqual(scores[0], ("foo", 0.5))
        self.assertEqual(scores[1], ("bar", 0.6))
コード例 #4
0
def run(corpus: Optional[Corpus], words: Optional[List], cached_keywords: Dict,
        scoring_methods: Set, scoring_methods_kwargs: Dict, agg_method: int,
        state: TaskState) -> Results:
    results = Results(scores=[], labels=[], all_keywords={})
    if not corpus:
        return results

    # passed by reference (and not copied) - to save partial results
    results.all_keywords = cached_keywords
    if not scoring_methods:
        return results

    def callback(i: float, status=""):
        state.set_progress_value(i * 100)
        if status:
            state.set_status(status)
        if state.is_interruption_requested():
            raise Exception

    callback(0, "Calculating...")
    scores = {}
    tokens = corpus.tokens
    documents = corpus.documents
    step = 1 / len(scoring_methods)
    for method_name, func in ScoringMethods.ITEMS:
        if method_name in scoring_methods:
            if method_name not in results.all_keywords:
                i = len(results.labels)
                cb = wrap_callback(callback,
                                   start=i * step,
                                   end=(i + 1) * step)

                needs_tokens = method_name in ScoringMethods.TOKEN_METHODS
                kw = {"progress_callback": cb}
                kw.update(scoring_methods_kwargs.get(method_name, {}))

                keywords = func(tokens if needs_tokens else documents, **kw)
                results.all_keywords[method_name] = keywords

            keywords = results.all_keywords[method_name]
            scores[method_name] = \
                dict(AggregationMethods.aggregate(keywords, agg_method))

            results.labels.append(method_name)

    scores = pd.DataFrame(scores)
    if words:

        # Normalize words
        for preprocessor in corpus.used_preprocessor.preprocessors:
            if isinstance(preprocessor, BaseNormalizer):
                words = [preprocessor.normalizer(w) for w in words]

        # Filter scores using words
        existing_words = [w for w in set(words) if w in scores.index]
        scores = scores.loc[existing_words] if existing_words \
            else scores.iloc[:0]

    results.scores = scores.reset_index().sort_values(
        by=[results.labels[0], "index"], ascending=[False,
                                                    True]).values.tolist()

    return results