Ejemplo n.º 1
0
def _get_calc_output(exploration_id, state_name, calculation_id):
    """Get state answers calculation output domain object obtained from
    StateAnswersCalcOutputModel instance stored in the data store. The
    calculation ID comes from the name of the calculation class used to compute
    aggregate data from submitted user answers. This returns aggregated output
    for all versions of the specified state and exploration.

    Args:
        exploration_id: str. ID of the exploration.
        state_name: str. Name of the state.
        calculation_id: str. Name of the calculation class.

    Returns:
        StateAnswersCalcOutput|None. The state answers calculation output
            domain object or None.
    """
    calc_output_model = stats_models.StateAnswersCalcOutputModel.get_model(
        exploration_id, VERSION_ALL, state_name, calculation_id)
    if calc_output_model:
        calculation_output = None
        if (calc_output_model.calculation_output_type ==
                stats_domain.CALC_OUTPUT_TYPE_ANSWER_FREQUENCY_LIST):
            calculation_output = (
                stats_domain.AnswerFrequencyList.from_raw_type(
                    calc_output_model.calculation_output))
        return stats_domain.StateAnswersCalcOutput(
            exploration_id, VERSION_ALL, state_name,
            calc_output_model.interaction_id, calculation_id,
            calculation_output)
    else:
        return None
Ejemplo n.º 2
0
    def calculate_from_state_answers_dict(self, state_answers_dict):
        """Computes the number of occurrences of each answer, split into groups
        based on the number of classification categories.

        This method is run from within the context of a MapReduce job.
        """
        grouped_submitted_answer_dicts = itertools.groupby(
            state_answers_dict['submitted_answer_list'],
            operator.itemgetter('classification_categorization'))
        submitted_answers_by_categorization = collections.defaultdict(list)
        for category, answer_dicts in grouped_submitted_answer_dicts:
            if category in CLASSIFICATION_CATEGORIES:
                submitted_answers_by_categorization[category].extend(
                    d['answer'] for d in answer_dicts)

        categorized_answer_frequency_lists = (
            stats_domain.CategorizedAnswerFrequencyLists({
                category: _get_top_answers_by_frequency(categorized_answers)
                for category, categorized_answers in
                submitted_answers_by_categorization.iteritems()
            }))
        return stats_domain.StateAnswersCalcOutput(
            state_answers_dict['exploration_id'],
            state_answers_dict['exploration_version'],
            state_answers_dict['state_name'],
            state_answers_dict['interaction_id'], self.id,
            categorized_answer_frequency_lists)
Ejemplo n.º 3
0
    def setUp(self):
        super(StateAnswersCalcOutputValidationTests, self).setUp()
        self.state_answers_calc_output = stats_domain.StateAnswersCalcOutput(
            'exp_id', 1, 'initial_state', 'AnswerFrequencies', {})

        # The canonical object should have no validation problems
        self.state_answers_calc_output.validate()
Ejemplo n.º 4
0
    def setUp(self):
        super(StateAnswersCalcOutputValidationTests, self).setUp()
        self.state_answers_calc_output = stats_domain.StateAnswersCalcOutput(
            'exp_id', 1, 'initial_state', 'TextInput', 'AnswerFrequencies',
            stats_domain.AnswerFrequencyList.from_raw_type([]))

        # The canonical object should have no validation problems.
        self.state_answers_calc_output.validate()
Ejemplo n.º 5
0
    def calculate_from_state_answers_dict(self, state_answers_dict):
        """Computes the number of occurrences of each answer, split into groups
        based on the number of classification categories.

        This method is run from within the context of a MapReduce job.
        """
        classify_categories = [
            exp_domain.EXPLICIT_CLASSIFICATION,
            exp_domain.TRAINING_DATA_CLASSIFICATION,
            exp_domain.STATISTICAL_CLASSIFICATION,
            exp_domain.DEFAULT_OUTCOME_CLASSIFICATION
        ]

        submitted_answer_list = state_answers_dict['submitted_answer_list']
        submitted_answers_by_categorization = {
            classify_category: [
                submitted_answer_dict
                for submitted_answer_dict in submitted_answer_list
                if submitted_answer_dict['classification_categorization'] == (
                    classify_category)
            ]
            for classify_category in classify_categories
        }
        top_answer_count_pairs_by_category = {
            classify_category: _count_answers(answers)
            for classify_category, answers in
            submitted_answers_by_categorization.iteritems()
        }

        calculation_output = {
            classify_category: []
            for classify_category in classify_categories
        }
        for classify_category, top_answer_counts_as_list_of_pairs in (
                top_answer_count_pairs_by_category.iteritems()):
            for item in top_answer_counts_as_list_of_pairs:
                answer_dict = item[0]
                calculation_output[classify_category].append({
                    'answer':
                    answer_dict['answer'],
                    'frequency':
                    item[1]
                })

        # Remove empty lists if no answers match within those categories.
        for classify_category in classify_categories:
            if not calculation_output[classify_category]:
                del calculation_output[classify_category]

        return stats_domain.StateAnswersCalcOutput(
            state_answers_dict['exploration_id'],
            state_answers_dict['exploration_version'],
            state_answers_dict['state_name'], self.id, calculation_output)
Ejemplo n.º 6
0
    def calculate_from_state_answers_dict(self, state_answers_dict):
        """Computes the number of occurrences of each answer, and returns a list
        of dicts; each dict has keys 'answer' and 'frequency'.

        This method is run from within the context of a MapReduce job.
        """
        answer_dicts = state_answers_dict['submitted_answer_list']
        answer_frequency_list = (_get_top_answers_by_frequency(
            d['answer'] for d in answer_dicts))
        return stats_domain.StateAnswersCalcOutput(
            state_answers_dict['exploration_id'],
            state_answers_dict['exploration_version'],
            state_answers_dict['state_name'],
            state_answers_dict['interaction_id'], self.id,
            answer_frequency_list)
Ejemplo n.º 7
0
    def calculate_from_state_answers_dict(self, state_answers_dict):
        """Computes the number of occurrences of each answer, keeping only
        the top 10 answers, and returns a list of dicts; each dict has keys
        'answer' and 'frequency'.

        This method is run from within the context of a MapReduce job.
        """
        calculation_output = _calculate_top_answer_frequencies(
            state_answers_dict, 10)

        return stats_domain.StateAnswersCalcOutput(
            state_answers_dict['exploration_id'],
            state_answers_dict['exploration_version'],
            state_answers_dict['state_name'],
            self.id,
            calculation_output)
Ejemplo n.º 8
0
    def calculate_from_state_answers_dict(self, state_answers_dict):
        """Computes the number of occurrences of each individual answer across
        all given answer sets, keeping only the top 10. Returns a list of dicts;
        each dict has keys 'answer' and 'frequency'.

        This method is run from within the context of a MapReduce job.
        """
        answer_dicts = state_answers_dict['submitted_answer_list']
        answer_frequency_list = _get_top_answers_by_frequency(
            itertools.chain.from_iterable(d['answer'] for d in answer_dicts),
            limit=10)
        return stats_domain.StateAnswersCalcOutput(
            state_answers_dict['exploration_id'],
            state_answers_dict['exploration_version'],
            state_answers_dict['state_name'],
            state_answers_dict['interaction_id'], self.id,
            answer_frequency_list)
Ejemplo n.º 9
0
    def calculate_from_state_answers_dict(self, state_answers_dict):
        """Computes the number of occurrences of each answer, and returns a
        list of dicts; each dict has keys 'answer' and 'frequency'.

        This method is run from within the context of a MapReduce job.
        """
        answer_counts_as_list_of_pairs = _count_answers(
            state_answers_dict['submitted_answer_list'])

        calculation_output = []
        for item in answer_counts_as_list_of_pairs:
            calculation_output.append({
                'answer': item[0]['answer'],
                'frequency': item[1],
            })

        return stats_domain.StateAnswersCalcOutput(
            state_answers_dict['exploration_id'],
            state_answers_dict['exploration_version'],
            state_answers_dict['state_name'], self.id, calculation_output)
Ejemplo n.º 10
0
    def calculate_from_state_answers_dict(self, state_answers_dict):
        """Filters unresolved answers and then computes the number of
        occurrences of each unresolved answer.

        This method is run within the context of a MapReduce job.

        Args:
            state_answers_dict: dict. A dict containing state answers and
                exploration information such as:
                * exploration_id: id of the exploration.
                * exploration_version: Specific version of the exploration or
                    VERSION_ALL is used if answers are aggragated across
                    multiple versions.
                * state_name: Name of the state.
                * interaction_id: id of the interaction.
                * submitted_answer_list: A list of submitted answers.
                    NOTE: The answers in this list must be sorted in
                    chronological order of their submission.

        Returns:
            stats_domain.StateAnswersCalcOutput. A calculation output object
            containing the list of top unresolved answers, in descending
            order of frequency (up to at most limit answers).
        """
        answers_with_classification = [{
            'answer': ans['answer'],
            'classification_categorization': (
                ans['classification_categorization'])
        } for ans in state_answers_dict['submitted_answer_list']]

        unresolved_answers = _get_top_unresolved_answers_by_frequency(
            answers_with_classification,
            limit=feconf.TOP_UNRESOLVED_ANSWERS_LIMIT)

        return stats_domain.StateAnswersCalcOutput(
            state_answers_dict['exploration_id'],
            state_answers_dict['exploration_version'],
            state_answers_dict['state_name'],
            state_answers_dict['interaction_id'],
            self.id,
            unresolved_answers)
Ejemplo n.º 11
0
    def calculate_from_state_answers_dict(self, state_answers_dict):
        """Computes the number of occurrences of each element across
        all given answers, keeping only the top 10 elements. Returns a
        list of dicts; each dict has keys 'element' and 'frequency'.

        This method is run from within the context of a MapReduce job.
        """
        answer_values = [
            answer_dict['answer']
            for answer_dict in state_answers_dict['submitted_answer_list']]

        list_of_all_elements = []
        for set_value in answer_values:
            list_of_all_elements += set_value

        elements_as_list_of_pairs = sorted(
            collections.Counter(list_of_all_elements).items(),
            key=lambda x: x[1],
            reverse=True)
        # Keep only top 10 elements
        if len(elements_as_list_of_pairs) > 10:
            elements_as_list_of_pairs = elements_as_list_of_pairs[:10]

        calculation_output = []
        for item in elements_as_list_of_pairs:
            # Save element with key 'answer' so it gets displayed correctly
            # by FrequencyTable visualization.
            calculation_output.append({
                'answer': item[0],
                'frequency': item[1],
            })

        return stats_domain.StateAnswersCalcOutput(
            state_answers_dict['exploration_id'],
            state_answers_dict['exploration_version'],
            state_answers_dict['state_name'],
            self.id,
            calculation_output)
Ejemplo n.º 12
0
    def get_calc_output(cls,
                        exploration_id,
                        state_name,
                        calculation_id,
                        exploration_version=VERSION_ALL):
        """Get state answers calculation output domain object obtained from
        StateAnswersCalcOutputModel instance stored in the data store. This
        aggregator does not have a real-time layer, which means the results
        from this function may be out of date. The calculation ID comes from
        the name of the calculation class used to compute aggregate data from
        submitted user answers.

        If 'exploration_version' is VERSION_ALL, this will return aggregated
        output for all versions of the specified state and exploration.
        """
        calc_output_model = stats_models.StateAnswersCalcOutputModel.get_model(
            exploration_id, exploration_version, state_name, calculation_id)
        if calc_output_model:
            return stats_domain.StateAnswersCalcOutput(
                exploration_id, exploration_version, state_name,
                calculation_id, calc_output_model.calculation_output)
        else:
            return None