Ejemplo n.º 1
0
 def test_calculation_output_must_be_less_than_one_million_bytes(self):
     occurred_answer = stats_domain.AnswerOccurrence(
         'This is not a long sentence.', 1)
     self.state_answers_calc_output.calculation_output = (
         stats_domain.AnswerFrequencyList([occurred_answer] * 200000))
     self._assert_validation_error(
         self.state_answers_calc_output,
         'calculation_output is too big to be stored')
Ejemplo n.º 2
0
 def test_convert_list_to_raw_object(self):
     answer_frequency_list = stats_domain.AnswerFrequencyList(
         [self.ANSWER_A, self.ANSWER_B])
     self.assertEqual(answer_frequency_list.to_raw_type(), [{
         'answer': 'answer a',
         'frequency': 3
     }, {
         'answer': 'answer b',
         'frequency': 2
     }])
Ejemplo n.º 3
0
 def test_convert_list_to_raw_object(self):
     answer_frequency_lists = stats_domain.CategorizedAnswerFrequencyLists({
         'category a':
         stats_domain.AnswerFrequencyList([self.ANSWER_A]),
         'category b':
         stats_domain.AnswerFrequencyList([self.ANSWER_B, self.ANSWER_C]),
     })
     self.assertEqual(
         answer_frequency_lists.to_raw_type(), {
             'category a': [{
                 'answer': 'answer a',
                 'frequency': 3
             }],
             'category b': [{
                 'answer': 'answer b',
                 'frequency': 2
             }, {
                 'answer': 'answer c',
                 'frequency': 1
             }]
         })
Ejemplo n.º 4
0
def _get_top_unresolved_answers_by_frequency(answers_with_classification,
                                             limit=None):
    """Computes the list of unresolved answers by keeping track of their latest
    classification categorization and then computes the occurrences of each
    unresolved answer, keeping only limit answers, and returns an
    AnswerFrequencyList.

    This method is run from within the context of a MapReduce job.

    Args:
        answers_with_classification: iterable(*). The collection of answers
            with their corresponding classification categorization.
        limit: int or None. The maximum number of answers to return. When None,
            all answers are returned.

    Returns:
        stats_domain.AnswerFrequencyList. A list of the top "limit"
        unresolved answers.
    """
    classification_results_dict = {}

    # The list of answers is sorted according to the time of answer submission.
    # Thus following loop goes through the list and aggregates the most recent
    # classification categorization of each answer.
    for ans in answers_with_classification:
        frequency = 0
        if HashableAnswer(ans['answer']) in classification_results_dict:
            frequency = classification_results_dict[HashableAnswer(
                ans['answer'])]['frequency']
        classification_results_dict[HashableAnswer(ans['answer'])] = {
            'classification_categorization':
            (ans['classification_categorization']),
            'frequency': frequency + 1
        }

    unresolved_answers_with_frequency_list = [
        {
            'answer': ans.answer,
            'frequency': val['frequency']
        } for ans, val in classification_results_dict.items()
        if val['classification_categorization'] in (
            UNRESOLVED_ANSWER_CLASSIFICATION_CATEGORIES)
    ]

    unresolved_answers_with_frequency_list.sort(key=lambda x: x['frequency'],
                                                reverse=True)

    return stats_domain.AnswerFrequencyList([
        stats_domain.AnswerOccurrence(item['answer'], item['frequency'])
        for item in unresolved_answers_with_frequency_list[:limit]
    ])
Ejemplo n.º 5
0
def _get_top_answers_by_frequency(answers, limit=None):
    """Computes the number of occurrences of each answer, keeping only the top
    limit answers, and returns an AnswerFrequencyList.

    This method is run from within the context of a MapReduce job.

    Args:
        answers: iterable(*). The collection of answers to be tallied.
        limit: int or None. The maximum number of answers to return. When None,
            all answers are returned.

    Returns:
        stats_domain.AnswerFrequencyList. A list of the top "limit" answers.
    """
    answer_counter = utils.OrderedCounter(_HashableAnswer(a) for a in answers)
    return stats_domain.AnswerFrequencyList([
        stats_domain.AnswerOccurrence(hashable_answer.answer, frequency)
        for hashable_answer, frequency in answer_counter.most_common(limit)
    ])
Ejemplo n.º 6
0
 def test_defaults_to_empty_list(self):
     answer_frequency_list = stats_domain.AnswerFrequencyList()
     self.assertEqual(len(answer_frequency_list.answer_occurrences), 0)
Ejemplo n.º 7
0
 def test_has_correct_type(self):
     answer_frequency_list = stats_domain.AnswerFrequencyList([])
     self.assertEqual(answer_frequency_list.calculation_output_type,
                      stats_domain.CALC_OUTPUT_TYPE_ANSWER_FREQUENCY_LIST)