def enrich_json_objects_by_object_type(request, value): """ Take the given value and start enrichment by object_type. The va Args: request (django.http.request.HttpRequest): request which is currently processed value (dict|list|django.db.models.Model): in case of django.db.models.Model object (or list of these objects), to_json method is invoked Returns: dict|list """ time_start_globally = time() if isinstance(value, list): json = [x.to_json() if hasattr(x, "to_json") else x for x in value] else: if isinstance(value, dict): json = value else: json = value.to_json() objects, nested = _collect_json_objects(json, by='object_type') for enricher_info in _get_OBJECT_TYPE_ENRICHER_ORDER(): if len(enricher_info['object_types']) > 0: enricher_objects = flatten([ objects.get(object_type, []) for object_type in enricher_info['object_types'] ]) enricher_nested = any([ nested.get(object_type, False) for object_type in enricher_info['object_types'] ]) else: enricher_objects = flatten(objects.values()) enricher_nested = any(nested.values()) if len(enricher_objects) > 0: time_start = time() enricher_info['enricher'](request, enricher_objects, enricher_nested) LOGGER.debug('enrichment "{}" took {} seconds'.format( enricher_info['enricher_name'], time() - time_start)) if not enricher_info['pure']: # if the enricher modified object types we must collect objects # again objects, nested = _collect_json_objects(json, by='object_type') LOGGER.debug( 'The whole enrichment of json objects by their object_type took {} seconds.' .format(time() - time_start_globally)) return json
def _get_OBJECT_TYPE_ENRICHER_ORDER(): with _OBJECT_TYPE_ENRICHERS_LOCK: global _OBJECT_TYPE_ENRICHER_ORDER if _OBJECT_TYPE_ENRICHER_ORDER is None: global _OBJECT_TYPE_ENRICHERS visited = set() stack = set() order = [] enrichers = _OBJECT_TYPE_ENRICHERS refs = set(flatten([enricher_info['dependencies'] for enricher_info in enrichers.values()])) roots = set(enrichers.keys()) - refs def _visit(enricher_info): if enricher_info['enricher_name'] in visited: return if enricher_info['enricher_name'] in stack: raise Exception('There is a cycle in dependencies of enrichers.') stack.add(enricher_info['enricher_name']) visited.add(enricher_info['enricher_name']) for enricher_dep in sorted(enricher_info['dependencies'], key=lambda name: enrichers[name]['priority']): _visit(enrichers[enricher_dep]) stack.remove(enricher_info['enricher_name']) order.append(enricher_info) for enricher_name, enricher_info in sorted(enrichers.items(), key=lambda x: x[1]['priority']): if enricher_name not in roots: continue _visit(enricher_info) indexes = dict([(enricher_info['enricher_name'], i) for (i, enricher_info) in enumerate(order)]) _OBJECT_TYPE_ENRICHER_ORDER = sorted(order, key=lambda e: indexes[e['enricher_name']]) return _OBJECT_TYPE_ENRICHER_ORDER
def _load_item_relations(self, data, db_objects, categories_json_key): print("\nFilling item types") call_command('fill_item_types') print("\nBuilding dependencies") parent_subgraph = {} lang_intersect = None for json_object in progress.bar(data, every=max(1, len(data) / 100)): # The language is not important here. langs = [ k[-2:] for k in json_object.keys() if re.match(r'^name-\w\w$', k) ] lang_intersect = set( langs ) if lang_intersect is None else lang_intersect & set(langs) lang = langs[0] db_object = db_objects[json_object["id"], lang] parent_items = parent_subgraph.get(db_object.item_id, set()) for parent in json_object.get(categories_json_key, []): parent_items.add('proso_flashcards_category/{}'.format(parent)) parent_subgraph[db_object.item_id] = parent_items lang = lang_intersect.pop() translated = Item.objects.translate_identifiers( flatten(parent_subgraph.values()), lang) Item.objects.override_parent_subgraph({ item: [translated[parent] for parent in parents] for item, parents in parent_subgraph.items() })
def _load_item_relations(self, data, db_objects, categories_json_key): db_objects_processed = {} for (identifier, lang), db_object in db_objects.items(): _, found_langs = db_objects_processed.get(identifier, (None, [])) db_objects_processed[identifier] = db_object, found_langs + [lang] print("\nFilling item types") call_command('fill_item_types') print("\nBuilding dependencies") parent_subgraph = {} lang_intersect = None for json_object in progress.bar(data, every=max(1, len(data) / 100)): db_object, langs = db_objects_processed[json_object["id"]] # The language is not important here. lang_intersect = set(langs) if lang_intersect is None else lang_intersect & set(langs) parent_items = parent_subgraph.get(db_object.item_id, set()) for parent in json_object.get(categories_json_key, []): parent_items.add('proso_flashcards_category/{}'.format(parent)) if 'context' in json_object: parent_items.add('proso_flashcards_context/{}'.format(json_object['context'])) if 'term' in json_object: parent_items.add('proso_flashcards_term/{}'.format(json_object['term'])) if 'term-secondary' in json_object: parent_items.add('proso_flashcards_term/{}'.format(json_object['term-secondary'])) parent_subgraph[db_object.item_id] = parent_items lang = lang_intersect.pop() translated = Item.objects.translate_identifiers( flatten(parent_subgraph.values()), lang ) Item.objects.override_parent_subgraph({ item: [translated[parent] for parent in parents] for item, parents in parent_subgraph.items() })
def context_flashcards(request, json_list, nested): if nested or len(json_list) > 1: return leave_items = Item.objects.get_leaves([c['item_id'] for c in json_list]) translated = Item.objects.translate_item_ids(flatten(leave_items.values()), json_list[0]['lang']) for context in json_list: context['flashcards'] = [translated[i] for i in leave_items[context['item_id']]]
def handle_dry(self, options): info = self.load_environment_info(options['initial'], options['config_name'], True) environment = InMemoryEnvironment() environment = self.load_environment(info) users, items = self.load_user_and_item_ids(info, options['batch_size']) items += list(set(flatten(Item.objects.get_reachable_parents(items).values()))) environment.prefetch(users, items) predictive_model = get_predictive_model(info.to_json()) with closing(connection.cursor()) as cursor: cursor.execute('SELECT COUNT(*) FROM proso_models_answer') answers_total = cursor.fetchone()[0] if options['limit'] is not None: answers_total = min(answers_total, options['limit']) print('total:', answers_total) processed = 0 prediction = numpy.empty(answers_total) correct = numpy.empty(answers_total) while processed < answers_total: cursor.execute( ''' SELECT id, user_id, item_id, item_asked_id, item_answered_id, time, response_time, guess FROM proso_models_answer ORDER BY id OFFSET %s LIMIT %s ''', [processed, options['batch_size']]) for (answer_id, user, item, asked, answered, time, response_time, guess) in cursor: correct[processed] = asked == answered prediction[processed] = predictive_model.predict_and_update( environment, user, item, asked == answered, time, item_answered=answered, item_asked=asked, guess=guess, answer_id=answer_id, response_time=response_time, ) environment.process_answer(user, item, asked, answered, time, answer_id, response_time, guess) processed += 1 if processed >= answers_total: break print('processed:', processed) filename = settings.DATA_DIR + '/recompute_model_report_{}.json'.format(predictive_model.__class__.__name__) model_report = report(prediction, correct) with open(filename, 'w') as outfile: json.dump(model_report, outfile) print('Saving report to:', filename) brier_graphs(model_report['brier'], predictive_model)
def get_option_for_flashcards(self, flashcards_with_question_types): question_types = {fc['id']: question_type for fc, question_type in flashcards_with_question_types} opt_set_cache = cache.get('flashcard_construction__context_option_set', {}) to_find = [fc for (fc, question_type) in flashcards_with_question_types if (fc['item_id'], question_type) not in opt_set_cache] if len(to_find) > 0: context_ids = {self.get_context_id(flashcard) for flashcard in to_find} types_all_item_ids = set([c.item_id for c in Category.objects.filter(type='flashcard_type')]) flashcard_item_ids = set([flashcard['item_id'] for flashcard in to_find]) reachable_parents = Item.objects.get_reachable_parents(flashcard_item_ids, language=to_find[0]['lang']) flashcard_types = {item_id: set(reachable_parents.get(item_id, [])) & types_all_item_ids for item_id in flashcard_item_ids} context_item_ids = dict(Context.objects.filter(pk__in=context_ids).values_list('id', 'item_id')) secondary_terms = dict(Flashcard.objects.all().values_list('item_id', 'term_secondary_id')) found = { flashcard['item_id']: [i for i in reduce( lambda xs, ys: set(xs) & set(ys), Item.objects.get_leaves({context_item_ids[self.get_context_id(flashcard)]} | flashcard_types[flashcard['item_id']], language=flashcard['lang']).values() ) if (secondary_terms.get(i) is not None) == ('term_secondary' in flashcard)] for flashcard in to_find } if any(['term_secondary' in flashcard for flashcard in to_find]): # exclude options: # 1) with duplicate term/term_secondary # 2) with the same question but different answer translated = Item.objects.translate_item_ids(set(flatten(found.values())), language=to_find[0]['lang']) fc_dict = {flashcard['item_id']: flashcard for flashcard in to_find} found_translated = { item_id: [translated[opt_id] for opt_id in options] for item_id, options in found.items() } found = {} for fc_item_id, options in found_translated.items(): fc = fc_dict[fc_item_id] if question_types[fc['id']] == FlashcardAnswer.FROM_TERM_TO_TERM_SECONDARY: key_to = 'term_secondary' key_from = 'term' elif question_types[fc['id']] == FlashcardAnswer.FROM_TERM_SECONDARY_TO_TERM: key_to = 'term' key_from = 'term_secondary' else: found[fc['item_id']] = [opt['item_id'] for opt in options] continue options_by_keys = {} for opt in sorted(options, key=lambda o: o['identifier']): if self.get_context_id(fc) == opt['context_id'] and fc[key_from]['identifier'] == opt[key_from]['identifier']: continue options_by_keys[opt[key_to]['item_id']] = opt if fc[key_to]['item_id'] in options_by_keys: del options_by_keys[fc[key_to]['item_id']] found[fc['item_id']] = [opt['item_id'] for opt in options_by_keys.values()] # trying to decrease probability of race condition opt_set_cache = cache.get('flashcard_construction__context_option_set', {}) opt_set_cache.update(found) cache.set('flashcard_construction__context_option_set', opt_set_cache) return {fc['item_id']: opt_set_cache[fc['item_id']] for fc, _ in flashcards_with_question_types}
def context_flashcards(request, json_list, nested): if nested or len(json_list) > 1: return leave_items = Item.objects.get_leaves([c['item_id'] for c in json_list]) translated = Item.objects.translate_item_ids(flatten(leave_items.values()), json_list[0]['lang']) for context in json_list: context['flashcards'] = [ translated[i] for i in leave_items[context['item_id']] ]
def handle(self, *args, **options): if len(options['roots']) == 0: raise Exception('At least one root has to be specified.') translated_roots = Item.objects.translate_identifiers(options['roots'], options['lang']) graph = Item.objects.get_children_graph(list(translated_roots.values()), language=options['lang']) translated_items = { i: '{}/{}'.format(o['object_type'], o['identifier']) for i, o in Item.objects.translate_item_ids(flatten(graph.values()), language=options['lang'], is_nested=True).items() } translated_graph = {translated_items.get(u): [translated_items.get(v) for v in vs] for u, vs in graph.items()} print(json.dumps(translated_graph))
def recalculate_concepts(self, concepts, lang=None): """ Recalculated given concepts for given users Args: concepts (dict): user id (int -> set of concepts to recalculate) lang(Optional[str]): language used to get items in all concepts (cached). Defaults to None, in that case are get items only in used concepts """ if len(concepts) == 0: return if lang is None: items = Concept.objects.get_concept_item_mapping(concepts=Concept.objects.filter(pk__in=set(flatten(concepts.values())))) else: items = Concept.objects.get_concept_item_mapping(lang=lang) environment = get_environment() mastery_threshold = get_mastery_trashold() for user, concepts in concepts.items(): all_items = list(set(flatten([items[c] for c in concepts]))) answer_counts = dict(list(zip(all_items, environment.number_of_answers_more_items(all_items, user)))) correct_answer_counts = dict(list(zip(all_items, environment.number_of_correct_answers_more_items(all_items, user)))) predictions = dict(list(zip(all_items, get_predictive_model(). predict_more_items(environment, user, all_items, time=None)))) new_user_stats = [] stats_to_delete_condition = Q() for concept in concepts: answer_aggregates = Answer.objects.filter(user=user, item__in=items[concept]).aggregate( time_spent=Sum("response_time"), sessions=Count("session", True), time_first=Min("time"), time_last=Max("time"), ) stats = { "answer_count": sum(answer_counts[i] for i in items[concept]), "correct_answer_count": sum(correct_answer_counts[i] for i in items[concept]), "item_count": len(items[concept]), "practiced_items_count": sum([answer_counts[i] > 0 for i in items[concept]]), "mastered_items_count": sum([predictions[i] >= mastery_threshold for i in items[concept]]), "prediction": sum([predictions[i] for i in items[concept]]) / len(items[concept]), "time_spent": answer_aggregates["time_spent"] / 1000, "session_count": answer_aggregates["sessions"], "time_first": answer_aggregates["time_first"].timestamp(), "time_last": answer_aggregates["time_last"].timestamp(), } stats_to_delete_condition |= Q(user=user, concept=concept) for stat_name, value in stats.items(): new_user_stats.append(UserStat(user_id=user, concept_id=concept, stat=stat_name, value=value)) self.filter(stats_to_delete_condition).delete() self.bulk_create(new_user_stats)
def enrich_json_objects_by_object_type(request, value): """ Take the given value and start enrichment by object_type. The va Args: request (django.http.request.HttpRequest): request which is currently processed value (dict|list|django.db.models.Model): in case of django.db.models.Model object (or list of these objects), to_json method is invoked Returns: dict|list """ time_start_globally = time() if isinstance(value, list): json = [x.to_json() if hasattr(x, "to_json") else x for x in value] else: if isinstance(value, dict): json = value else: json = value.to_json() objects, nested = _collect_json_objects(json, by='object_type') for enricher_info in _get_OBJECT_TYPE_ENRICHER_ORDER(): if len(enricher_info['object_types']) > 0: enricher_objects = flatten([objects.get(object_type, []) for object_type in enricher_info['object_types']]) enricher_nested = any([nested.get(object_type, False) for object_type in enricher_info['object_types']]) else: enricher_objects = flatten(objects.values()) enricher_nested = any(nested.values()) if len(enricher_objects) > 0: time_start = time() enricher_info['enricher'](request, enricher_objects, enricher_nested) LOGGER.debug('enrichment "{}" took {} seconds'.format(enricher_info['enricher_name'], time() - time_start)) if not enricher_info['pure']: # if the enricher modified object types we must collect objects # again objects, nested = _collect_json_objects(json, by='object_type') LOGGER.debug('The whole enrichment of json objects by their object_type took {} seconds.'.format(time() - time_start_globally)) return json
def number_of_correct_answers(request, json_list, nested): if 'stats' not in request.GET: return object_item_ids = [x['item_id'] for x in json_list] user = get_user_id(request) leaves = models.Item.objects.get_leaves(object_item_ids, language=get_language(request)) all_leaves = set(flatten(leaves.values())) number_of_correct_answers = _environment(request).number_of_correct_answers_more_items( user=user, items=all_leaves) for object_json in json_list: num = sum([number_of_correct_answers[leave] for leave in leaves[object_json['item_id']]]) object_json['number_of_correct_answers'] = num object_json['practiced_correctly'] = num > 0 return json_list
def avg_prediction(request, json_list, nested): if 'stats' not in request.GET: return object_item_ids = [x['item_id'] for x in json_list] leaves = models.Item.objects.get_leaves(object_item_ids, language=get_language(request)) all_leaves = list(set(flatten(leaves.values()))) user = get_user_id(request) time = models.get_time_for_knowledge_overview(request) predictions = dict(list(zip(all_leaves, _predictive_model().predict_more_items( _environment(request), user, all_leaves, time )))) mastery_threshold = get_mastery_trashold() for object_json in json_list: leaf_predictions = [predictions[leave] for leave in leaves[object_json['item_id']]] object_json['avg_predicton'] = numpy.mean(leaf_predictions) object_json['mastered'] = sum([p > mastery_threshold for p in leaf_predictions])
def filter_all_reachable_leaves_many(self, identifier_filters, language): """ Provides the same functionality as .. py:method:: ItemManager.filter_all_reachable_leaves(), but for more filters in the same time. Args: identifier_filters: list of identifier filters language (str): language used for further filtering (some objects for different languages share the same item Returns: list: list of list of item ids """ for i, identifier_filter in enumerate(identifier_filters): if len(identifier_filter) == 1 and not isinstance(identifier_filter[0], list): identifier_filters[i] = [identifier_filter] if any([len(xs) == 1 and xs[0].startswith('-') for xs in identifier_filter]): raise Exception('Filter containing only one identifier with "-" prefix is not allowed.') item_identifiers = [ identifier[1:] if identifier.startswith('-') else identifier for identifier_filter in identifier_filters for identifier in set(flatten(identifier_filter)) ] translated = self.translate_identifiers(item_identifiers, language) leaves = self.get_leaves(list(translated.values())) result = [] for identifier_filter in identifier_filters: filter_result = set() for inner_filter in identifier_filter: inner_result = None inner_neg_result = set() if len(inner_filter) == 0: raise Exception('Empty nested filters are not allowed.') for identifier in inner_filter: if identifier.startswith('-'): inner_neg_result |= set(leaves[translated[identifier[1:]]]) elif inner_result is None: inner_result = set(leaves[translated[identifier]]) else: inner_result &= set(leaves[translated[identifier]]) filter_result |= inner_result - inner_neg_result result.append(sorted(list(filter_result))) return result
def handle(self, *args, **options): if len(options['roots']) == 0: raise Exception('At least one root has to be specified.') translated_roots = Item.objects.translate_identifiers( options['roots'], options['lang']) graph = Item.objects.get_children_graph(list( translated_roots.values()), language=options['lang']) translated_items = { i: '{}/{}'.format(o['object_type'], o['identifier']) for i, o in Item.objects.translate_item_ids( flatten(graph.values()), language=options['lang'], is_nested=True).items() } translated_graph = { translated_items.get(u): [translated_items.get(v) for v in vs] for u, vs in graph.items() } print(json.dumps(translated_graph))
def _get_OBJECT_TYPE_ENRICHER_ORDER(): with _OBJECT_TYPE_ENRICHERS_LOCK: global _OBJECT_TYPE_ENRICHER_ORDER if _OBJECT_TYPE_ENRICHER_ORDER is None: global _OBJECT_TYPE_ENRICHERS visited = set() stack = set() order = [] enrichers = _OBJECT_TYPE_ENRICHERS refs = set( flatten([ enricher_info['dependencies'] for enricher_info in enrichers.values() ])) roots = set(enrichers.keys()) - refs def _visit(enricher_info): if enricher_info['enricher_name'] in visited: return if enricher_info['enricher_name'] in stack: raise Exception( 'There is a cycle in dependencies of enrichers.') stack.add(enricher_info['enricher_name']) visited.add(enricher_info['enricher_name']) for enricher_dep in sorted( enricher_info['dependencies'], key=lambda name: enrichers[name]['priority']): _visit(enrichers[enricher_dep]) stack.remove(enricher_info['enricher_name']) order.append(enricher_info) for enricher_name, enricher_info in sorted( enrichers.items(), key=lambda x: x[1]['priority']): if enricher_name not in roots: continue _visit(enricher_info) indexes = dict([(enricher_info['enricher_name'], i) for (i, enricher_info) in enumerate(order)]) _OBJECT_TYPE_ENRICHER_ORDER = sorted( order, key=lambda e: indexes[e['enricher_name']]) return _OBJECT_TYPE_ENRICHER_ORDER
def _load_item_relations(self, data, db_objects, categories_json_key): db_objects_processed = {} for (identifier, lang), db_object in db_objects.items(): _, found_langs = db_objects_processed.get(identifier, (None, [])) db_objects_processed[identifier] = db_object, found_langs + [lang] print("\nFilling item types") call_command('fill_item_types') print("\nBuilding dependencies") parent_subgraph = {} lang_intersect = None for json_object in progress.bar(data, every=max(1, len(data) / 100)): db_object, langs = db_objects_processed[json_object["id"]] # The language is not important here. lang_intersect = set( langs ) if lang_intersect is None else lang_intersect & set(langs) parent_items = parent_subgraph.get(db_object.item_id, set()) for parent in json_object.get(categories_json_key, []): parent_items.add('proso_flashcards_category/{}'.format(parent)) if 'context' in json_object: parent_items.add('proso_flashcards_context/{}'.format( json_object['context'])) if 'term' in json_object: parent_items.add('proso_flashcards_term/{}'.format( json_object['term'])) if 'term-secondary' in json_object: parent_items.add('proso_flashcards_term/{}'.format( json_object['term-secondary'])) parent_subgraph[db_object.item_id] = parent_items lang = lang_intersect.pop() translated = Item.objects.translate_identifiers( flatten(parent_subgraph.values()), lang) Item.objects.override_parent_subgraph({ item: [translated[parent] for parent in parents] for item, parents in parent_subgraph.items() })
def _load_item_relations(self, data, db_objects, categories_json_key): print("\nFilling item types") call_command('fill_item_types') print("\nBuilding dependencies") parent_subgraph = {} lang_intersect = None for json_object in progress.bar(data, every=max(1, len(data) / 100)): # The language is not important here. langs = [k[-2:] for k in json_object.keys() if re.match(r'^name-\w\w$', k)] lang_intersect = set(langs) if lang_intersect is None else lang_intersect & set(langs) lang = langs[0] db_object = db_objects[json_object["id"], lang] parent_items = parent_subgraph.get(db_object.item_id, set()) for parent in json_object.get(categories_json_key, []): parent_items.add('proso_flashcards_category/{}'.format(parent)) parent_subgraph[db_object.item_id] = parent_items lang = lang_intersect.pop() translated = Item.objects.translate_identifiers( flatten(parent_subgraph.values()), lang ) Item.objects.override_parent_subgraph({ item: [translated[parent] for parent in parents] for item, parents in parent_subgraph.items() })
def __search(item_ids): result = set(flatten([children.get(item_id, []) for item_id in item_ids])) new_leaves = {item_id for item_id in result if item_id not in children.keys()} leaves.update(new_leaves) return result - new_leaves
def recompute(self, info, options): print(' -- preparing phase') timer('recompute_prepare') environment = self.load_environment(info) users, items = self.load_user_and_item_ids(info, options['batch_size']) items += list(set(flatten(Item.objects.get_reachable_parents(items).values()))) environment.prefetch(users, items) predictive_model = get_predictive_model(info.to_json()) print(' -- preparing phase, time:', timer('recompute_prepare'), 'seconds') timer('recompute_model') print(' -- model phase') with closing(connection.cursor()) as cursor: cursor.execute( ''' SELECT id, user_id, item_id, item_asked_id, item_answered_id, time, response_time, guess FROM proso_models_answer ORDER BY id OFFSET %s LIMIT %s ''', [info.load_progress, options['batch_size']]) progress_bar = progress.bar(cursor, every=max(1, cursor.rowcount // 100), expected_size=cursor.rowcount) info.load_progress += cursor.rowcount for (answer_id, user, item, asked, answered, time, response_time, guess) in progress_bar: predictive_model.predict_and_update( environment, user, item, asked == answered, time, item_answered=answered, item_asked=asked, guess=guess, answer_id=answer_id, response_time=response_time, ) environment.process_answer(user, item, asked, answered, time, answer_id, response_time, guess) print(' -- model phase, time:', timer('recompute_model'), 'seconds') timer('recompute_flush') print(' -- flushing phase') environment.flush(clean=options['finish']) print(' -- flushing phase, time:', timer('recompute_flush'), 'seconds, total number of answers:', info.load_progress) if options['finish']: timer('recompute_finish') print(' -- finishing phase') try: previous_info = EnvironmentInfo.objects.get(status=EnvironmentInfo.STATUS_ACTIVE) previous_info.status = EnvironmentInfo.STATUS_DISABLED previous_info.save() cache.delete(ENVIRONMENT_INFO_CACHE_KEY) except EnvironmentInfo.DoesNotExist: pass info.status = EnvironmentInfo.STATUS_ACTIVE print(' -- finishing phase, time:', timer('recompute_finish'), 'seconds') info.save()
def user_stats(request): """ Get user statistics for selected groups of items time: time in format '%Y-%m-%d_%H:%M:%S' used for practicing user: identifier of the user (only for stuff users) username: username of user (only for users with public profile) filters: -- use this or body json as in BODY mastered: use model to compute number of mastered items - can be slowed language: language of the items BODY json in following format: { "#identifier": [] -- custom identifier (str) and filter ... } """ timer('user_stats') response = {} data = None if request.method == "POST": data = json.loads(request.body.decode("utf-8"))["filters"] if "filters" in request.GET: data = load_query_json(request.GET, "filters") if data is None: return render_json(request, {}, template='models_user_stats.html', help_text=user_stats.__doc__) environment = get_environment() if is_time_overridden(request): environment.shift_time(get_time(request)) user_id = get_user_id(request) language = get_language(request) filter_names, filter_filters = list(zip(*sorted(data.items()))) reachable_leaves = Item.objects.filter_all_reachable_leaves_many(filter_filters, language) all_leaves = flatten(reachable_leaves) answers = dict(list(zip(all_leaves, environment.number_of_answers_more_items(all_leaves, user_id)))) correct_answers = dict(list(zip(all_leaves, environment.number_of_correct_answers_more_items(all_leaves, user_id)))) if request.GET.get("mastered"): timer('user_stats_mastered') mastery_threshold = get_mastery_trashold() predictions = get_predictive_model().predict_more_items(environment, user_id, all_leaves, get_time(request)) mastered = dict(list(zip(all_leaves, [p >= mastery_threshold for p in predictions]))) LOGGER.debug("user_stats - getting predictions for flashcards took %s seconds", (timer('user_stats_mastered'))) for identifier, items in zip(filter_names, reachable_leaves): if len(items) == 0: response[identifier] = { "filter": data[identifier], "number_of_flashcards": 0, } else: response[identifier] = { "filter": data[identifier], "number_of_flashcards": len(items), "number_of_practiced_flashcards": sum(answers[i] > 0 for i in items), "number_of_answers": sum(answers[i] for i in items), "number_of_correct_answers": sum(correct_answers[i] for i in items), } if request.GET.get("mastered"): response[identifier]["number_of_mastered_flashcards"]= sum(mastered[i] for i in items) return render_json(request, response, template='models_user_stats.html', help_text=user_stats.__doc__)
def recalculate_concepts(self, concepts, lang=None): """ Recalculated given concepts for given users Args: concepts (dict): user id (int -> set of concepts to recalculate) lang(Optional[str]): language used to get items in all concepts (cached). Defaults to None, in that case are get items only in used concepts """ if len(concepts) == 0: return if lang is None: items = Concept.objects.get_concept_item_mapping( concepts=Concept.objects.filter( pk__in=set(flatten(concepts.values())))) else: items = Concept.objects.get_concept_item_mapping(lang=lang) environment = get_environment() mastery_threshold = get_mastery_trashold() for user, concepts in concepts.items(): all_items = list(set(flatten([items[c] for c in concepts]))) answer_counts = dict( list( zip( all_items, environment.number_of_answers_more_items( all_items, user)))) correct_answer_counts = dict( list( zip( all_items, environment.number_of_correct_answers_more_items( all_items, user)))) predictions = dict( list( zip( all_items, get_predictive_model().predict_more_items(environment, user, all_items, time=None)))) new_user_stats = [] stats_to_delete_condition = Q() for concept in concepts: answer_aggregates = Answer.objects.filter( user=user, item__in=items[concept]).aggregate( time_spent=Sum("response_time"), sessions=Count("session", True), time_first=Min("time"), time_last=Max("time"), ) stats = { "answer_count": sum(answer_counts[i] for i in items[concept]), "correct_answer_count": sum(correct_answer_counts[i] for i in items[concept]), "item_count": len(items[concept]), "practiced_items_count": sum([answer_counts[i] > 0 for i in items[concept]]), "mastered_items_count": sum([ predictions[i] >= mastery_threshold for i in items[concept] ]), "prediction": sum([predictions[i] for i in items[concept]]) / len(items[concept]), "time_spent": answer_aggregates["time_spent"] / 1000, "session_count": answer_aggregates["sessions"], "time_first": answer_aggregates["time_first"].timestamp(), "time_last": answer_aggregates["time_last"].timestamp(), } stats_to_delete_condition |= Q(user=user, concept=concept) for stat_name, value in stats.items(): new_user_stats.append( UserStat(user_id=user, concept_id=concept, stat=stat_name, value=value)) self.filter(stats_to_delete_condition).delete() self.bulk_create(new_user_stats)
def user_stats(request): """ Get user statistics for selected groups of items time: time in format '%Y-%m-%d_%H:%M:%S' used for practicing user: identifier of the user (only for stuff users) username: username of user (only for users with public profile) filters: -- use this or body json as in BODY mastered: use model to compute number of mastered items - can be slowed language: language of the items BODY json in following format: { "#identifier": [] -- custom identifier (str) and filter ... } """ timer('user_stats') response = {} data = None if request.method == "POST": data = json.loads(request.body.decode("utf-8"))["filters"] if "filters" in request.GET: data = load_query_json(request.GET, "filters") if data is None: return render_json(request, {}, template='models_user_stats.html', help_text=user_stats.__doc__) environment = get_environment() if is_time_overridden(request): environment.shift_time(get_time(request)) user_id = get_user_id(request) language = get_language(request) filter_names, filter_filters = list(zip(*sorted(data.items()))) reachable_leaves = Item.objects.filter_all_reachable_leaves_many( filter_filters, language) all_leaves = sorted(list(set(flatten(reachable_leaves)))) answers = environment.number_of_answers_more_items(all_leaves, user_id) correct_answers = environment.number_of_correct_answers_more_items( all_leaves, user_id) if request.GET.get("mastered"): timer('user_stats_mastered') mastery_threshold = get_mastery_trashold() predictions = Item.objects.predict_for_overview( environment, user_id, all_leaves) mastered = dict( list(zip(all_leaves, [p >= mastery_threshold for p in predictions]))) LOGGER.debug( "user_stats - getting predictions for items took %s seconds", (timer('user_stats_mastered'))) for identifier, items in zip(filter_names, reachable_leaves): if len(items) == 0: response[identifier] = { "filter": data[identifier], "number_of_items": 0, } else: response[identifier] = { "filter": data[identifier], "number_of_items": len(items), "number_of_practiced_items": sum(answers[i] > 0 for i in items), "number_of_answers": sum(answers[i] for i in items), "number_of_correct_answers": sum(correct_answers[i] for i in items), } if request.GET.get("mastered"): response[identifier]["number_of_mastered_items"] = sum( mastered[i] for i in items) return render_json(request, response, template='models_user_stats.html', help_text=user_stats.__doc__)
def recompute(self, info, options): print(' -- preparing phase') timer('recompute_prepare') environment = self.load_environment(info) users, items = self.load_user_and_item_ids(info, options['batch_size']) items += list( set(flatten(Item.objects.get_reachable_parents(items).values()))) environment.prefetch(users, items) predictive_model = get_predictive_model(info.to_json()) print(' -- preparing phase, time:', timer('recompute_prepare'), 'seconds') timer('recompute_model') print(' -- model phase') with closing(connection.cursor()) as cursor: cursor.execute( ''' SELECT id, user_id, item_id, item_asked_id, item_answered_id, time, response_time, guess FROM proso_models_answer ORDER BY id OFFSET %s LIMIT %s ''', [info.load_progress, options['batch_size']]) progress_bar = progress.bar(cursor, every=max(1, cursor.rowcount // 100), expected_size=cursor.rowcount) info.load_progress += cursor.rowcount for (answer_id, user, item, asked, answered, time, response_time, guess) in progress_bar: predictive_model.predict_and_update( environment, user, item, asked == answered, time, item_answered=answered, item_asked=asked, guess=guess, answer_id=answer_id, response_time=response_time, ) environment.process_answer(user, item, asked, answered, time, answer_id, response_time, guess) print(' -- model phase, time:', timer('recompute_model'), 'seconds') timer('recompute_flush') print(' -- flushing phase') environment.flush(clean=options['finish']) print(' -- flushing phase, time:', timer('recompute_flush'), 'seconds, total number of answers:', info.load_progress) if options['finish']: timer('recompute_finish') print(' -- finishing phase') try: previous_info = EnvironmentInfo.objects.get( status=EnvironmentInfo.STATUS_ACTIVE) previous_info.status = EnvironmentInfo.STATUS_DISABLED previous_info.save() cache.delete(ENVIRONMENT_INFO_CACHE_KEY) except EnvironmentInfo.DoesNotExist: pass info.status = EnvironmentInfo.STATUS_ACTIVE print(' -- finishing phase, time:', timer('recompute_finish'), 'seconds') info.save()
def handle_dry(self, options): info = self.load_environment_info(options['initial'], options['config_name'], True) environment = InMemoryEnvironment(audit_enabled=False) environment = self.load_environment(info) users, items = self.load_user_and_item_ids(info, options['batch_size']) items += list( set(flatten(Item.objects.get_reachable_parents(items).values()))) environment.prefetch(users, items) predictive_model = get_predictive_model(info.to_json()) with closing(connection.cursor()) as cursor: cursor.execute('SELECT COUNT(*) FROM proso_models_answer') answers_total = cursor.fetchone()[0] if options['limit'] is not None: answers_total = min(answers_total, options['limit']) print('total:', answers_total) processed = 0 prediction = numpy.empty(answers_total) correct = numpy.empty(answers_total) while processed < answers_total: cursor.execute( ''' SELECT id, user_id, item_id, item_asked_id, item_answered_id, time, response_time, guess FROM proso_models_answer ORDER BY id OFFSET %s LIMIT %s ''', [processed, options['batch_size']]) for (answer_id, user, item, asked, answered, time, response_time, guess) in cursor: correct[processed] = asked == answered prediction[ processed] = predictive_model.predict_and_update( environment, user, item, asked == answered, time, item_answered=answered, item_asked=asked, guess=guess, answer_id=answer_id, response_time=response_time, ) environment.process_answer(user, item, asked, answered, time, answer_id, response_time, guess) processed += 1 if processed >= answers_total: break print('processed:', processed) filename = settings.DATA_DIR + '/recompute_model_report_{}.json'.format( predictive_model.__class__.__name__) model_report = report(prediction, correct) with open(filename, 'w') as outfile: json.dump(model_report, outfile) print('Saving report to:', filename) brier_graphs(model_report['brier'], predictive_model)