def _get_calc_output(exploration_id, state_name, calculation_id): """Get state answers calculation output domain object obtained from StateAnswersCalcOutputModel instance stored in the data store. The calculation ID comes from the name of the calculation class used to compute aggregate data from submitted user answers. This returns aggregated output for all versions of the specified state and exploration. Args: exploration_id: str. ID of the exploration. state_name: str. Name of the state. calculation_id: str. Name of the calculation class. Returns: StateAnswersCalcOutput|None. The state answers calculation output domain object or None. """ calc_output_model = stats_models.StateAnswersCalcOutputModel.get_model( exploration_id, VERSION_ALL, state_name, calculation_id) if calc_output_model: calculation_output = None if (calc_output_model.calculation_output_type == stats_domain.CALC_OUTPUT_TYPE_ANSWER_FREQUENCY_LIST): calculation_output = ( stats_domain.AnswerFrequencyList.from_raw_type( calc_output_model.calculation_output)) return stats_domain.StateAnswersCalcOutput( exploration_id, VERSION_ALL, state_name, calc_output_model.interaction_id, calculation_id, calculation_output) else: return None
def calculate_from_state_answers_dict(self, state_answers_dict): """Computes the number of occurrences of each answer, split into groups based on the number of classification categories. This method is run from within the context of a MapReduce job. """ grouped_submitted_answer_dicts = itertools.groupby( state_answers_dict['submitted_answer_list'], operator.itemgetter('classification_categorization')) submitted_answers_by_categorization = collections.defaultdict(list) for category, answer_dicts in grouped_submitted_answer_dicts: if category in CLASSIFICATION_CATEGORIES: submitted_answers_by_categorization[category].extend( d['answer'] for d in answer_dicts) categorized_answer_frequency_lists = ( stats_domain.CategorizedAnswerFrequencyLists({ category: _get_top_answers_by_frequency(categorized_answers) for category, categorized_answers in submitted_answers_by_categorization.iteritems() })) return stats_domain.StateAnswersCalcOutput( state_answers_dict['exploration_id'], state_answers_dict['exploration_version'], state_answers_dict['state_name'], state_answers_dict['interaction_id'], self.id, categorized_answer_frequency_lists)
def setUp(self): super(StateAnswersCalcOutputValidationTests, self).setUp() self.state_answers_calc_output = stats_domain.StateAnswersCalcOutput( 'exp_id', 1, 'initial_state', 'AnswerFrequencies', {}) # The canonical object should have no validation problems self.state_answers_calc_output.validate()
def setUp(self): super(StateAnswersCalcOutputValidationTests, self).setUp() self.state_answers_calc_output = stats_domain.StateAnswersCalcOutput( 'exp_id', 1, 'initial_state', 'TextInput', 'AnswerFrequencies', stats_domain.AnswerFrequencyList.from_raw_type([])) # The canonical object should have no validation problems. self.state_answers_calc_output.validate()
def calculate_from_state_answers_dict(self, state_answers_dict): """Computes the number of occurrences of each answer, split into groups based on the number of classification categories. This method is run from within the context of a MapReduce job. """ classify_categories = [ exp_domain.EXPLICIT_CLASSIFICATION, exp_domain.TRAINING_DATA_CLASSIFICATION, exp_domain.STATISTICAL_CLASSIFICATION, exp_domain.DEFAULT_OUTCOME_CLASSIFICATION ] submitted_answer_list = state_answers_dict['submitted_answer_list'] submitted_answers_by_categorization = { classify_category: [ submitted_answer_dict for submitted_answer_dict in submitted_answer_list if submitted_answer_dict['classification_categorization'] == ( classify_category) ] for classify_category in classify_categories } top_answer_count_pairs_by_category = { classify_category: _count_answers(answers) for classify_category, answers in submitted_answers_by_categorization.iteritems() } calculation_output = { classify_category: [] for classify_category in classify_categories } for classify_category, top_answer_counts_as_list_of_pairs in ( top_answer_count_pairs_by_category.iteritems()): for item in top_answer_counts_as_list_of_pairs: answer_dict = item[0] calculation_output[classify_category].append({ 'answer': answer_dict['answer'], 'frequency': item[1] }) # Remove empty lists if no answers match within those categories. for classify_category in classify_categories: if not calculation_output[classify_category]: del calculation_output[classify_category] return stats_domain.StateAnswersCalcOutput( state_answers_dict['exploration_id'], state_answers_dict['exploration_version'], state_answers_dict['state_name'], self.id, calculation_output)
def calculate_from_state_answers_dict(self, state_answers_dict): """Computes the number of occurrences of each answer, and returns a list of dicts; each dict has keys 'answer' and 'frequency'. This method is run from within the context of a MapReduce job. """ answer_dicts = state_answers_dict['submitted_answer_list'] answer_frequency_list = (_get_top_answers_by_frequency( d['answer'] for d in answer_dicts)) return stats_domain.StateAnswersCalcOutput( state_answers_dict['exploration_id'], state_answers_dict['exploration_version'], state_answers_dict['state_name'], state_answers_dict['interaction_id'], self.id, answer_frequency_list)
def calculate_from_state_answers_dict(self, state_answers_dict): """Computes the number of occurrences of each answer, keeping only the top 10 answers, and returns a list of dicts; each dict has keys 'answer' and 'frequency'. This method is run from within the context of a MapReduce job. """ calculation_output = _calculate_top_answer_frequencies( state_answers_dict, 10) return stats_domain.StateAnswersCalcOutput( state_answers_dict['exploration_id'], state_answers_dict['exploration_version'], state_answers_dict['state_name'], self.id, calculation_output)
def calculate_from_state_answers_dict(self, state_answers_dict): """Computes the number of occurrences of each individual answer across all given answer sets, keeping only the top 10. Returns a list of dicts; each dict has keys 'answer' and 'frequency'. This method is run from within the context of a MapReduce job. """ answer_dicts = state_answers_dict['submitted_answer_list'] answer_frequency_list = _get_top_answers_by_frequency( itertools.chain.from_iterable(d['answer'] for d in answer_dicts), limit=10) return stats_domain.StateAnswersCalcOutput( state_answers_dict['exploration_id'], state_answers_dict['exploration_version'], state_answers_dict['state_name'], state_answers_dict['interaction_id'], self.id, answer_frequency_list)
def calculate_from_state_answers_dict(self, state_answers_dict): """Computes the number of occurrences of each answer, and returns a list of dicts; each dict has keys 'answer' and 'frequency'. This method is run from within the context of a MapReduce job. """ answer_counts_as_list_of_pairs = _count_answers( state_answers_dict['submitted_answer_list']) calculation_output = [] for item in answer_counts_as_list_of_pairs: calculation_output.append({ 'answer': item[0]['answer'], 'frequency': item[1], }) return stats_domain.StateAnswersCalcOutput( state_answers_dict['exploration_id'], state_answers_dict['exploration_version'], state_answers_dict['state_name'], self.id, calculation_output)
def calculate_from_state_answers_dict(self, state_answers_dict): """Filters unresolved answers and then computes the number of occurrences of each unresolved answer. This method is run within the context of a MapReduce job. Args: state_answers_dict: dict. A dict containing state answers and exploration information such as: * exploration_id: id of the exploration. * exploration_version: Specific version of the exploration or VERSION_ALL is used if answers are aggragated across multiple versions. * state_name: Name of the state. * interaction_id: id of the interaction. * submitted_answer_list: A list of submitted answers. NOTE: The answers in this list must be sorted in chronological order of their submission. Returns: stats_domain.StateAnswersCalcOutput. A calculation output object containing the list of top unresolved answers, in descending order of frequency (up to at most limit answers). """ answers_with_classification = [{ 'answer': ans['answer'], 'classification_categorization': ( ans['classification_categorization']) } for ans in state_answers_dict['submitted_answer_list']] unresolved_answers = _get_top_unresolved_answers_by_frequency( answers_with_classification, limit=feconf.TOP_UNRESOLVED_ANSWERS_LIMIT) return stats_domain.StateAnswersCalcOutput( state_answers_dict['exploration_id'], state_answers_dict['exploration_version'], state_answers_dict['state_name'], state_answers_dict['interaction_id'], self.id, unresolved_answers)
def calculate_from_state_answers_dict(self, state_answers_dict): """Computes the number of occurrences of each element across all given answers, keeping only the top 10 elements. Returns a list of dicts; each dict has keys 'element' and 'frequency'. This method is run from within the context of a MapReduce job. """ answer_values = [ answer_dict['answer'] for answer_dict in state_answers_dict['submitted_answer_list']] list_of_all_elements = [] for set_value in answer_values: list_of_all_elements += set_value elements_as_list_of_pairs = sorted( collections.Counter(list_of_all_elements).items(), key=lambda x: x[1], reverse=True) # Keep only top 10 elements if len(elements_as_list_of_pairs) > 10: elements_as_list_of_pairs = elements_as_list_of_pairs[:10] calculation_output = [] for item in elements_as_list_of_pairs: # Save element with key 'answer' so it gets displayed correctly # by FrequencyTable visualization. calculation_output.append({ 'answer': item[0], 'frequency': item[1], }) return stats_domain.StateAnswersCalcOutput( state_answers_dict['exploration_id'], state_answers_dict['exploration_version'], state_answers_dict['state_name'], self.id, calculation_output)
def get_calc_output(cls, exploration_id, state_name, calculation_id, exploration_version=VERSION_ALL): """Get state answers calculation output domain object obtained from StateAnswersCalcOutputModel instance stored in the data store. This aggregator does not have a real-time layer, which means the results from this function may be out of date. The calculation ID comes from the name of the calculation class used to compute aggregate data from submitted user answers. If 'exploration_version' is VERSION_ALL, this will return aggregated output for all versions of the specified state and exploration. """ calc_output_model = stats_models.StateAnswersCalcOutputModel.get_model( exploration_id, exploration_version, state_name, calculation_id) if calc_output_model: return stats_domain.StateAnswersCalcOutput( exploration_id, exploration_version, state_name, calculation_id, calc_output_model.calculation_output) else: return None