def _save_answers(request, practice_context, finish_practice_set): timer('_save_answers') json_objects = _get_answers(request) answers = [] last_answers = Answer.objects.prefetch_related('practice_set').filter( user_id=request.user.id).order_by('-id')[:1] if len(last_answers) == 0 or last_answers[ 0].context_id != practice_context.id or last_answers[ 0].practice_set is None or last_answers[ 0].practice_set.finished: if len(last_answers ) > 0 and last_answers[0].context_id != practice_context.id: PracticeSet.objects.filter(answer__user_id=request.user.id).update( finished=True) practice_set = PracticeSet.objects.create() else: practice_set = last_answers[0].practice_set if finish_practice_set: practice_set.finished = True practice_set.save() for json_object in json_objects: if 'answer_class' not in json_object: raise BadRequestException( 'The answer does not contain key "answer_class".') answer_class = Answer.objects.answer_class(json_object['answer_class']) answers.append( answer_class.objects.from_json(json_object, practice_context, practice_set, request.user.id)) LOGGER.debug("saving of %s answers took %s seconds", len(answers), timer('_save_answers')) return answers
def wrapper(*args, **kwargs): if hasattr(settings, 'TESTING') and settings.TESTING: return func(*args, **kwargs) if len(args) > 0 and re.match(r"<.+ object at \w+>", repr(args[0])) is not None: key_args = [args[0].__class__] + list(args[1:]) else: key_args = args key = "{}:args:{}-kwargs:{}".format(func.__name__, repr(key_args), repr(kwargs)) hash_key = hashlib.sha1(key.encode()).hexdigest() if is_cache_prepared(): value = get_request_cache().get(hash_key, CACHE_MISS) if value != CACHE_MISS: return value value = cache.get(hash_key, CACHE_MISS) if value != CACHE_MISS: return value timer(hash_key) value = func(*args, **kwargs) if not self._request_only: cache.set(hash_key, value, self._expiration) if is_cache_prepared(): get_request_cache().set(hash_key, value) return value
def wrapper(*args, **kwargs): if hasattr(settings, 'TESTING') and settings.TESTING: return func(*args, **kwargs) if len(args) > 0 and re.match(r"<.+ object at \w+>", repr( args[0])) is not None: key_args = [args[0].__class__] + list(args[1:]) else: key_args = args key = "{}:args:{}-kwargs:{}".format(func.__name__, repr(key_args), repr(kwargs)) hash_key = hashlib.sha1(key.encode()).hexdigest() if is_cache_prepared(): value = get_request_cache().get(hash_key, CACHE_MISS) if value != CACHE_MISS: return value value = cache.get(hash_key, CACHE_MISS) if value != CACHE_MISS: return value timer(hash_key) value = func(*args, **kwargs) if not self._request_only: cache.set(hash_key, value, self._expiration) if is_cache_prepared(): get_request_cache().set(hash_key, value) return value
def handle_recompute(self, options): timer('recompute_all') info = self.load_environment_info(options['initial'], options['config_name'], False) if options['finish']: to_process = self.number_of_answers_to_process(info) if self.number_of_answers_to_process(info) >= options['batch_size'] and not options['force']: raise CommandError("There is more then allowed number of answers (%s) to process." % to_process) self.recompute(info, options) else: self.recompute(info, options) print(' -- total time of recomputation:', timer('recompute_all'), 'seconds')
def handle_recompute(self, options): timer('recompute_all') info = self.load_environment_info(options['initial'], options['config_name'], False) if options['finish']: to_process = self.number_of_answers_to_process(info) if self.number_of_answers_to_process( info) >= options['batch_size'] and not options['force']: raise CommandError( "There is more then allowed number of answers (%s) to process." % to_process) self.recompute(info, options) else: self.recompute(info, options) print(' -- total time of recomputation:', timer('recompute_all'), 'seconds')
def to_practice_counts(request): """ Get number of items available to practice. filters: -- use this or body json as in BODY language: language of the items BODY json in following format: { "#identifier": [] -- custom identifier (str) and filter ... } """ data = None if request.method == "POST": data = json.loads(request.body.decode("utf-8"))["filters"] if "filters" in request.GET: data = load_query_json(request.GET, "filters") if data is None or len(data) == 0: return render_json(request, {}, template='models_json.html', help_text=to_practice_counts.__doc__) language = get_language(request) timer('to_practice_counts') filter_names, filter_filters = list(zip(*sorted(data.items()))) reachable_leaves = Item.objects.filter_all_reachable_leaves_many( filter_filters, language) response = { group_id: { 'filter': data[group_id], 'number_of_items': len(items), } for group_id, items in zip(filter_names, reachable_leaves) } LOGGER.debug( "to_practice_counts - getting items in groups took %s seconds", (timer('to_practice_counts'))) return render_json(request, response, template='models_json.html', help_text=to_practice_counts.__doc__)
def handle_validate(self, options): timer('recompute_validation') info = self.load_environment_info(options['initial'], options['config_name'], False) with closing(connection.cursor()) as cursor: cursor.execute( ''' SELECT key, user_id, item_primary_id, item_secondary_id FROM proso_models_variable WHERE info_id = %s GROUP BY 1, 2, 3, 4 HAVING COUNT(*) > 1 ''', [info.id]) fetched = cursor.fetchall() if len(fetched) > 0: print(' -- there are {} violations of variable uniqueness:') for key, user, primary, secondary in fetched: print(' - ', key, user, primary, secondary) sys.exit('canceling due to previous error') else: print(' -- validation passed:', timer('recompute_validation'), 'seconds')
def _save_answers(request, practice_context, finish_practice_set): timer('_save_answers') json_objects = _get_answers(request) answers = [] last_answers = Answer.objects.prefetch_related('practice_set').filter(user_id=request.user.id).order_by('-id')[:1] if len(last_answers) == 0 or last_answers[0].context_id != practice_context.id or last_answers[0].practice_set is None or last_answers[0].practice_set.finished: if len(last_answers) > 0 and last_answers[0].context_id != practice_context.id: PracticeSet.objects.filter(answer__user_id=request.user.id).update(finished=True) practice_set = PracticeSet.objects.create() else: practice_set = last_answers[0].practice_set if finish_practice_set: practice_set.finished = True practice_set.save() for json_object in json_objects: if 'answer_class' not in json_object: raise BadRequestException('The answer does not contain key "answer_class".') answer_class = Answer.objects.answer_class(json_object['answer_class']) answers.append(answer_class.objects.from_json(json_object, practice_context, practice_set, request.user.id)) LOGGER.debug("saving of %s answers took %s seconds", len(answers), timer('_save_answers')) return answers
def handle_gc(self, options): timer('recompute_gc') print(' -- collecting garbage') to_gc = [ str(x.id) for x in EnvironmentInfo.objects.filter( status=EnvironmentInfo.STATUS_DISABLED).all() ] if not to_gc: print(' -- no environment info to collect') return to_gc_str = ','.join(to_gc) with closing(connection.cursor()) as cursor: cursor.execute( 'DELETE FROM proso_models_variable WHERE info_id IN (%s)' % to_gc_str) variables = cursor.rowcount cursor.execute( 'DELETE FROM proso_models_audit WHERE info_id IN (%s)' % to_gc_str) audits = cursor.rowcount cursor.execute( 'DELETE FROM proso_models_environmentinfo WHERE id IN (%s)' % to_gc_str) infos = cursor.rowcount if is_on_postgresql(): timer('recompute_vacuum') cursor.execute( 'VACUUM FULL ANALYZE VERBOSE proso_models_variable') cursor.execute( 'VACUUM FULL ANALYZE VERBOSE proso_models_audit') print(' -- vacuum phase, time:', timer('recompute_vacuum'), 'seconds') print(' -- collecting garbage, time:', timer('recompute_gc'), 'seconds, deleted', variables, 'variables,', audits, 'audit records,', infos, 'environment info records')
def to_practice_counts(request): """ Get number of items available to practice. filters: -- use this or body json as in BODY language: language of the items BODY json in following format: { "#identifier": [] -- custom identifier (str) and filter ... } """ data = None if request.method == "POST": data = json.loads(request.body.decode("utf-8"))["filters"] if "filters" in request.GET: data = load_query_json(request.GET, "filters") if data is None or len(data) == 0: return render_json(request, {}, template='models_json.html', help_text=to_practice_counts.__doc__) language = get_language(request) timer('to_practice_counts') filter_names, filter_filters = list(zip(*sorted(data.items()))) reachable_leaves = Item.objects.filter_all_reachable_leaves_many(filter_filters, language) response = { group_id: { 'filter': data[group_id], 'number_of_items': len(items), } for group_id, items in zip(filter_names, reachable_leaves) } LOGGER.debug("to_practice_counts - getting items in groups took %s seconds", (timer('to_practice_counts'))) return render_json(request, response, template='models_json.html', help_text=to_practice_counts.__doc__)
def handle_gc(self, options): timer('recompute_gc') print(' -- collecting garbage') to_gc = [str(x.id) for x in EnvironmentInfo.objects.filter(status=EnvironmentInfo.STATUS_DISABLED).all()] if not to_gc: print(' -- no environment info to collect') return to_gc_str = ','.join(to_gc) with closing(connection.cursor()) as cursor: cursor.execute('DELETE FROM proso_models_variable WHERE info_id IN (%s)' % to_gc_str) variables = cursor.rowcount cursor.execute('DELETE FROM proso_models_environmentinfo WHERE id IN (%s)' % to_gc_str) infos = cursor.rowcount if is_on_postgresql(): timer('recompute_vacuum') cursor.execute('VACUUM FULL ANALYZE VERBOSE proso_models_variable') print(' -- vacuum phase, time:', timer('recompute_vacuum'), 'seconds') print(' -- collecting garbage, time:', timer('recompute_gc'), 'seconds, deleted', variables, 'variables,', infos, 'environment info records')
def user_stats(request): """ Get user statistics for selected groups of items time: time in format '%Y-%m-%d_%H:%M:%S' used for practicing user: identifier of the user (only for stuff users) username: username of user (only for users with public profile) filters: -- use this or body json as in BODY mastered: use model to compute number of mastered items - can be slowed language: language of the items BODY json in following format: { "#identifier": [] -- custom identifier (str) and filter ... } """ timer('user_stats') response = {} data = None if request.method == "POST": data = json.loads(request.body.decode("utf-8"))["filters"] if "filters" in request.GET: data = load_query_json(request.GET, "filters") if data is None: return render_json(request, {}, template='models_user_stats.html', help_text=user_stats.__doc__) environment = get_environment() if is_time_overridden(request): environment.shift_time(get_time(request)) user_id = get_user_id(request) language = get_language(request) filter_names, filter_filters = list(zip(*sorted(data.items()))) reachable_leaves = Item.objects.filter_all_reachable_leaves_many( filter_filters, language) all_leaves = sorted(list(set(flatten(reachable_leaves)))) answers = environment.number_of_answers_more_items(all_leaves, user_id) correct_answers = environment.number_of_correct_answers_more_items( all_leaves, user_id) if request.GET.get("mastered"): timer('user_stats_mastered') mastery_threshold = get_mastery_trashold() predictions = Item.objects.predict_for_overview( environment, user_id, all_leaves) mastered = dict( list(zip(all_leaves, [p >= mastery_threshold for p in predictions]))) LOGGER.debug( "user_stats - getting predictions for items took %s seconds", (timer('user_stats_mastered'))) for identifier, items in zip(filter_names, reachable_leaves): if len(items) == 0: response[identifier] = { "filter": data[identifier], "number_of_items": 0, } else: response[identifier] = { "filter": data[identifier], "number_of_items": len(items), "number_of_practiced_items": sum(answers[i] > 0 for i in items), "number_of_answers": sum(answers[i] for i in items), "number_of_correct_answers": sum(correct_answers[i] for i in items), } if request.GET.get("mastered"): response[identifier]["number_of_mastered_items"] = sum( mastered[i] for i in items) return render_json(request, response, template='models_user_stats.html', help_text=user_stats.__doc__)
def recompute(self, info, options): print(' -- preparing phase') timer('recompute_prepare') environment = self.load_environment(info) users, items = self.load_user_and_item_ids(info, options['batch_size']) items += list( set(flatten(Item.objects.get_reachable_parents(items).values()))) environment.prefetch(users, items) predictive_model = get_predictive_model(info.to_json()) print(' -- preparing phase, time:', timer('recompute_prepare'), 'seconds') timer('recompute_model') print(' -- model phase') with closing(connection.cursor()) as cursor: cursor.execute( ''' SELECT id, user_id, item_id, item_asked_id, item_answered_id, time, response_time, guess FROM proso_models_answer ORDER BY id OFFSET %s LIMIT %s ''', [info.load_progress, options['batch_size']]) progress_bar = progress.bar(cursor, every=max(1, cursor.rowcount // 100), expected_size=cursor.rowcount) info.load_progress += cursor.rowcount for (answer_id, user, item, asked, answered, time, response_time, guess) in progress_bar: predictive_model.predict_and_update( environment, user, item, asked == answered, time, item_answered=answered, item_asked=asked, guess=guess, answer_id=answer_id, response_time=response_time, ) environment.process_answer(user, item, asked, answered, time, answer_id, response_time, guess) print(' -- model phase, time:', timer('recompute_model'), 'seconds') timer('recompute_flush') print(' -- flushing phase') environment.flush(clean=options['finish']) print(' -- flushing phase, time:', timer('recompute_flush'), 'seconds, total number of answers:', info.load_progress) if options['finish']: timer('recompute_finish') print(' -- finishing phase') try: previous_info = EnvironmentInfo.objects.get( status=EnvironmentInfo.STATUS_ACTIVE) previous_info.status = EnvironmentInfo.STATUS_DISABLED previous_info.save() cache.delete(ENVIRONMENT_INFO_CACHE_KEY) except EnvironmentInfo.DoesNotExist: pass info.status = EnvironmentInfo.STATUS_ACTIVE print(' -- finishing phase, time:', timer('recompute_finish'), 'seconds') info.save()
def user_stats(request): """ Get user statistics for selected groups of items time: time in format '%Y-%m-%d_%H:%M:%S' used for practicing user: identifier of the user (only for stuff users) username: username of user (only for users with public profile) filters: -- use this or body json as in BODY mastered: use model to compute number of mastered items - can be slowed language: language of the items BODY json in following format: { "#identifier": [] -- custom identifier (str) and filter ... } """ timer('user_stats') response = {} data = None if request.method == "POST": data = json.loads(request.body.decode("utf-8"))["filters"] if "filters" in request.GET: data = load_query_json(request.GET, "filters") if data is None: return render_json(request, {}, template='models_user_stats.html', help_text=user_stats.__doc__) environment = get_environment() if is_time_overridden(request): environment.shift_time(get_time(request)) user_id = get_user_id(request) language = get_language(request) filter_names, filter_filters = list(zip(*sorted(data.items()))) reachable_leaves = Item.objects.filter_all_reachable_leaves_many(filter_filters, language) all_leaves = sorted(list(set(flatten(reachable_leaves)))) answers = environment.number_of_answers_more_items(all_leaves, user_id) correct_answers = environment.number_of_correct_answers_more_items(all_leaves, user_id) if request.GET.get("mastered"): timer('user_stats_mastered') mastery_threshold = get_mastery_trashold() predictions = Item.objects.predict_for_overview(environment, user_id, all_leaves) mastered = dict(list(zip(all_leaves, [p >= mastery_threshold for p in predictions]))) LOGGER.debug("user_stats - getting predictions for items took %s seconds", (timer('user_stats_mastered'))) for identifier, items in zip(filter_names, reachable_leaves): if len(items) == 0: response[identifier] = { "filter": data[identifier], "number_of_items": 0, } else: response[identifier] = { "filter": data[identifier], "number_of_items": len(items), "number_of_practiced_items": sum(answers[i] > 0 for i in items), "number_of_answers": sum(answers[i] for i in items), "number_of_correct_answers": sum(correct_answers[i] for i in items), } if request.GET.get("mastered"): response[identifier]["number_of_mastered_items"]= sum(mastered[i] for i in items) return render_json(request, response, template='models_user_stats.html', help_text=user_stats.__doc__)
def recompute(self, info, options): print(' -- preparing phase') timer('recompute_prepare') environment = self.load_environment(info) users, items = self.load_user_and_item_ids(info, options['batch_size']) items += list(set(flatten(Item.objects.get_reachable_parents(items).values()))) environment.prefetch(users, items) predictive_model = get_predictive_model(info.to_json()) print(' -- preparing phase, time:', timer('recompute_prepare'), 'seconds') timer('recompute_model') print(' -- model phase') with closing(connection.cursor()) as cursor: cursor.execute( ''' SELECT id, user_id, item_id, item_asked_id, item_answered_id, time, response_time, guess FROM proso_models_answer ORDER BY id OFFSET %s LIMIT %s ''', [info.load_progress, options['batch_size']]) progress_bar = progress.bar(cursor, every=max(1, cursor.rowcount // 100), expected_size=cursor.rowcount) info.load_progress += cursor.rowcount for (answer_id, user, item, asked, answered, time, response_time, guess) in progress_bar: predictive_model.predict_and_update( environment, user, item, asked == answered, time, item_answered=answered, item_asked=asked, guess=guess, answer_id=answer_id, response_time=response_time, ) environment.process_answer(user, item, asked, answered, time, answer_id, response_time, guess) print(' -- model phase, time:', timer('recompute_model'), 'seconds') timer('recompute_flush') print(' -- flushing phase') environment.flush(clean=options['finish']) print(' -- flushing phase, time:', timer('recompute_flush'), 'seconds, total number of answers:', info.load_progress) if options['finish']: timer('recompute_finish') print(' -- finishing phase') try: previous_info = EnvironmentInfo.objects.get(status=EnvironmentInfo.STATUS_ACTIVE) previous_info.status = EnvironmentInfo.STATUS_DISABLED previous_info.save() cache.delete(ENVIRONMENT_INFO_CACHE_KEY) except EnvironmentInfo.DoesNotExist: pass info.status = EnvironmentInfo.STATUS_ACTIVE print(' -- finishing phase, time:', timer('recompute_finish'), 'seconds') info.save()