def small_concepts(request, skill_identifier): if skill_identifier not in SKILL_TABLES: return JsonResponse({'msg': 'no_table'}) skill = get_object_or_404(Skill, identifier=skill_identifier) skills = Skill.objects.filter(item__child_relations__parent=skill.item_id, active=True) items = [s.item_id for s in skills] environment = get_environment() model = get_predictive_model() predictions = model.predict_more_items(environment, request.user.pk, items, datetime.now()) answer_counts = environment.read_more_items('answer_count', user=request.user.pk, items=items, default=0) data = {} for s, p, i in zip(skills, predictions, items): data[s.identifier] = { 'name': s.name, 'prediction': p, 'answer_count': answer_counts[i] } return JsonResponse({ 'structure': SKILL_TABLES[skill_identifier], 'data': data })
def recompute(self, info, options): print(' -- preparing phase') timer('recompute_prepare') environment = self.load_environment(info) users, items = self.load_user_and_item_ids(info, options['batch_size']) environment.prefetch(users, items) predictive_model = get_predictive_model() print(' -- preparing phase, time:', timer('recompute_prepare'), 'seconds') timer('recompute_model') print(' -- model phase') with closing(connection.cursor()) as cursor: cursor.execute( ''' SELECT id, user_id, item_id, item_asked_id, item_answered_id, time, response_time, guess FROM proso_models_answer ORDER BY id OFFSET %s LIMIT %s ''', [info.load_progress, options['batch_size']]) progress_bar = progress.bar(cursor, every=max(1, cursor.rowcount / 100), expected_size=cursor.rowcount) info.load_progress += cursor.rowcount for (answer_id, user, item, asked, answered, time, response_time, guess) in progress_bar: predictive_model.predict_and_update( environment, user, item, asked == answered, time, item_answered=answered, item_asked=asked, guess=guess, answer_id=answer_id) environment.process_answer(user, item, asked, answered, time, answer_id, response_time, guess) print(' -- model phase, time:', timer('recompute_model'), 'seconds') timer('recompute_flush') print(' -- flushing phase') environment.flush(clean=options['finish']) print(' -- flushing phase, time:', timer('recompute_flush'), 'seconds, total number of answers:', info.load_progress) if options['finish']: timer('recompute_finish') print(' -- finishing phase') try: previous_info = EnvironmentInfo.objects.get(status=EnvironmentInfo.STATUS_ACTIVE) previous_info.status = EnvironmentInfo.STATUS_DISABLED previous_info.save() cache.delete(ENVIRONMENT_INFO_CACHE_KEY) except EnvironmentInfo.DoesNotExist: pass info.status = EnvironmentInfo.STATUS_ACTIVE print(' -- finishing phase, time:', timer('recompute_finish'), 'seconds') info.save()
def handle_dry(self, options): info = self.load_environment_info(options['initial'], options['config_name'], True) environment = InMemoryEnvironment() environment = self.load_environment(info) users, items = self.load_user_and_item_ids(info, options['batch_size']) items += list(set(flatten(Item.objects.get_reachable_parents(items).values()))) environment.prefetch(users, items) predictive_model = get_predictive_model(info.to_json()) with closing(connection.cursor()) as cursor: cursor.execute('SELECT COUNT(*) FROM proso_models_answer') answers_total = cursor.fetchone()[0] if options['limit'] is not None: answers_total = min(answers_total, options['limit']) print('total:', answers_total) processed = 0 prediction = numpy.empty(answers_total) correct = numpy.empty(answers_total) while processed < answers_total: cursor.execute( ''' SELECT id, user_id, item_id, item_asked_id, item_answered_id, time, response_time, guess FROM proso_models_answer ORDER BY id OFFSET %s LIMIT %s ''', [processed, options['batch_size']]) for (answer_id, user, item, asked, answered, time, response_time, guess) in cursor: correct[processed] = asked == answered prediction[processed] = predictive_model.predict_and_update( environment, user, item, asked == answered, time, item_answered=answered, item_asked=asked, guess=guess, answer_id=answer_id, response_time=response_time, ) environment.process_answer(user, item, asked, answered, time, answer_id, response_time, guess) processed += 1 if processed >= answers_total: break print('processed:', processed) filename = settings.DATA_DIR + '/recompute_model_report_{}.json'.format(predictive_model.__class__.__name__) model_report = report(prediction, correct) with open(filename, 'w') as outfile: json.dump(model_report, outfile) print('Saving report to:', filename) brier_graphs(model_report['brier'], predictive_model)
def recalculate_concepts(self, concepts, lang=None): """ Recalculated given concepts for given users Args: concepts (dict): user id (int -> set of concepts to recalculate) lang(Optional[str]): language used to get items in all concepts (cached). Defaults to None, in that case are get items only in used concepts """ if len(concepts) == 0: return if lang is None: items = Concept.objects.get_concept_item_mapping(concepts=Concept.objects.filter(pk__in=set(flatten(concepts.values())))) else: items = Concept.objects.get_concept_item_mapping(lang=lang) environment = get_environment() mastery_threshold = get_mastery_trashold() for user, concepts in concepts.items(): all_items = list(set(flatten([items[c] for c in concepts]))) answer_counts = dict(list(zip(all_items, environment.number_of_answers_more_items(all_items, user)))) correct_answer_counts = dict(list(zip(all_items, environment.number_of_correct_answers_more_items(all_items, user)))) predictions = dict(list(zip(all_items, get_predictive_model(). predict_more_items(environment, user, all_items, time=None)))) new_user_stats = [] stats_to_delete_condition = Q() for concept in concepts: answer_aggregates = Answer.objects.filter(user=user, item__in=items[concept]).aggregate( time_spent=Sum("response_time"), sessions=Count("session", True), time_first=Min("time"), time_last=Max("time"), ) stats = { "answer_count": sum(answer_counts[i] for i in items[concept]), "correct_answer_count": sum(correct_answer_counts[i] for i in items[concept]), "item_count": len(items[concept]), "practiced_items_count": sum([answer_counts[i] > 0 for i in items[concept]]), "mastered_items_count": sum([predictions[i] >= mastery_threshold for i in items[concept]]), "prediction": sum([predictions[i] for i in items[concept]]) / len(items[concept]), "time_spent": answer_aggregates["time_spent"] / 1000, "session_count": answer_aggregates["sessions"], "time_first": answer_aggregates["time_first"].timestamp(), "time_last": answer_aggregates["time_last"].timestamp(), } stats_to_delete_condition |= Q(user=user, concept=concept) for stat_name, value in stats.items(): new_user_stats.append(UserStat(user_id=user, concept_id=concept, stat=stat_name, value=value)) self.filter(stats_to_delete_condition).delete() self.bulk_create(new_user_stats)
def small_concepts(request, skill_identifier): if skill_identifier not in SKILL_TABLES: return JsonResponse({'msg': 'no_table'}) skill = get_object_or_404(Skill, identifier=skill_identifier) skills = Skill.objects.filter(item__child_relations__parent=skill.item_id, active=True) items = [s.item_id for s in skills] environment = get_environment() model = get_predictive_model() predictions = model.predict_more_items(environment, request.user.pk, items, datetime.now()) answer_counts = environment.read_more_items('answer_count', user=request.user.pk, items=items, default=0) data = {} for s, p, i in zip(skills, predictions, items): data[s.identifier] = { 'name': s.name, 'prediction':p, 'answer_count': answer_counts[i] } return JsonResponse({ 'structure': SKILL_TABLES[skill_identifier], 'data': data })
def handle_dry(self, options): info = self.load_environment_info(options['initial'], options['config_name'], True) environment = InMemoryEnvironment(audit_enabled=False) environment = self.load_environment(info) users, items = self.load_user_and_item_ids(info, options['batch_size']) items += list( set(flatten(Item.objects.get_reachable_parents(items).values()))) environment.prefetch(users, items) predictive_model = get_predictive_model(info.to_json()) with closing(connection.cursor()) as cursor: cursor.execute('SELECT COUNT(*) FROM proso_models_answer') answers_total = cursor.fetchone()[0] if options['limit'] is not None: answers_total = min(answers_total, options['limit']) print('total:', answers_total) processed = 0 prediction = numpy.empty(answers_total) correct = numpy.empty(answers_total) while processed < answers_total: cursor.execute( ''' SELECT id, user_id, item_id, item_asked_id, item_answered_id, time, response_time, guess FROM proso_models_answer ORDER BY id OFFSET %s LIMIT %s ''', [processed, options['batch_size']]) for (answer_id, user, item, asked, answered, time, response_time, guess) in cursor: correct[processed] = asked == answered prediction[ processed] = predictive_model.predict_and_update( environment, user, item, asked == answered, time, item_answered=answered, item_asked=asked, guess=guess, answer_id=answer_id, response_time=response_time, ) environment.process_answer(user, item, asked, answered, time, answer_id, response_time, guess) processed += 1 if processed >= answers_total: break print('processed:', processed) filename = settings.DATA_DIR + '/recompute_model_report_{}.json'.format( predictive_model.__class__.__name__) model_report = report(prediction, correct) with open(filename, 'w') as outfile: json.dump(model_report, outfile) print('Saving report to:', filename) brier_graphs(model_report['brier'], predictive_model)
def recompute(self, info, options): print(' -- preparing phase') timer('recompute_prepare') environment = self.load_environment(info) users, items = self.load_user_and_item_ids(info, options['batch_size']) items += list( set(flatten(Item.objects.get_reachable_parents(items).values()))) environment.prefetch(users, items) predictive_model = get_predictive_model(info.to_json()) print(' -- preparing phase, time:', timer('recompute_prepare'), 'seconds') timer('recompute_model') print(' -- model phase') with closing(connection.cursor()) as cursor: cursor.execute( ''' SELECT id, user_id, item_id, item_asked_id, item_answered_id, time, response_time, guess FROM proso_models_answer ORDER BY id OFFSET %s LIMIT %s ''', [info.load_progress, options['batch_size']]) progress_bar = progress.bar(cursor, every=max(1, cursor.rowcount // 100), expected_size=cursor.rowcount) info.load_progress += cursor.rowcount for (answer_id, user, item, asked, answered, time, response_time, guess) in progress_bar: predictive_model.predict_and_update( environment, user, item, asked == answered, time, item_answered=answered, item_asked=asked, guess=guess, answer_id=answer_id, response_time=response_time, ) environment.process_answer(user, item, asked, answered, time, answer_id, response_time, guess) print(' -- model phase, time:', timer('recompute_model'), 'seconds') timer('recompute_flush') print(' -- flushing phase') environment.flush(clean=options['finish']) print(' -- flushing phase, time:', timer('recompute_flush'), 'seconds, total number of answers:', info.load_progress) if options['finish']: timer('recompute_finish') print(' -- finishing phase') try: previous_info = EnvironmentInfo.objects.get( status=EnvironmentInfo.STATUS_ACTIVE) previous_info.status = EnvironmentInfo.STATUS_DISABLED previous_info.save() cache.delete(ENVIRONMENT_INFO_CACHE_KEY) except EnvironmentInfo.DoesNotExist: pass info.status = EnvironmentInfo.STATUS_ACTIVE print(' -- finishing phase, time:', timer('recompute_finish'), 'seconds') info.save()
def recalculate_concepts(self, concepts, lang=None): """ Recalculated given concepts for given users Args: concepts (dict): user id (int -> set of concepts to recalculate) lang(Optional[str]): language used to get items in all concepts (cached). Defaults to None, in that case are get items only in used concepts """ if len(concepts) == 0: return if lang is None: items = Concept.objects.get_concept_item_mapping( concepts=Concept.objects.filter( pk__in=set(flatten(concepts.values())))) else: items = Concept.objects.get_concept_item_mapping(lang=lang) environment = get_environment() mastery_threshold = get_mastery_trashold() for user, concepts in concepts.items(): all_items = list(set(flatten([items[c] for c in concepts]))) answer_counts = dict( list( zip( all_items, environment.number_of_answers_more_items( all_items, user)))) correct_answer_counts = dict( list( zip( all_items, environment.number_of_correct_answers_more_items( all_items, user)))) predictions = dict( list( zip( all_items, get_predictive_model().predict_more_items(environment, user, all_items, time=None)))) new_user_stats = [] stats_to_delete_condition = Q() for concept in concepts: answer_aggregates = Answer.objects.filter( user=user, item__in=items[concept]).aggregate( time_spent=Sum("response_time"), sessions=Count("session", True), time_first=Min("time"), time_last=Max("time"), ) stats = { "answer_count": sum(answer_counts[i] for i in items[concept]), "correct_answer_count": sum(correct_answer_counts[i] for i in items[concept]), "item_count": len(items[concept]), "practiced_items_count": sum([answer_counts[i] > 0 for i in items[concept]]), "mastered_items_count": sum([ predictions[i] >= mastery_threshold for i in items[concept] ]), "prediction": sum([predictions[i] for i in items[concept]]) / len(items[concept]), "time_spent": answer_aggregates["time_spent"] / 1000, "session_count": answer_aggregates["sessions"], "time_first": answer_aggregates["time_first"].timestamp(), "time_last": answer_aggregates["time_last"].timestamp(), } stats_to_delete_condition |= Q(user=user, concept=concept) for stat_name, value in stats.items(): new_user_stats.append( UserStat(user_id=user, concept_id=concept, stat=stat_name, value=value)) self.filter(stats_to_delete_condition).delete() self.bulk_create(new_user_stats)