Example #1
0
def enrich_json_objects_by_object_type(request, value):
    """
    Take the given value and start enrichment by object_type. The va

    Args:
        request (django.http.request.HttpRequest): request which is currently processed
        value (dict|list|django.db.models.Model):
            in case of django.db.models.Model object (or list of these
            objects), to_json method is invoked

    Returns:
        dict|list
    """
    time_start_globally = time()
    if isinstance(value, list):
        json = [x.to_json() if hasattr(x, "to_json") else x for x in value]
    else:
        if isinstance(value, dict):
            json = value
        else:
            json = value.to_json()
    objects, nested = _collect_json_objects(json, by='object_type')
    for enricher_info in _get_OBJECT_TYPE_ENRICHER_ORDER():
        if len(enricher_info['object_types']) > 0:
            enricher_objects = flatten([
                objects.get(object_type, [])
                for object_type in enricher_info['object_types']
            ])
            enricher_nested = any([
                nested.get(object_type, False)
                for object_type in enricher_info['object_types']
            ])
        else:
            enricher_objects = flatten(objects.values())
            enricher_nested = any(nested.values())
        if len(enricher_objects) > 0:
            time_start = time()
            enricher_info['enricher'](request, enricher_objects,
                                      enricher_nested)
            LOGGER.debug('enrichment "{}" took {} seconds'.format(
                enricher_info['enricher_name'],
                time() - time_start))
            if not enricher_info['pure']:
                # if the enricher modified object types we must collect objects
                # again
                objects, nested = _collect_json_objects(json, by='object_type')
    LOGGER.debug(
        'The whole enrichment of json objects by their object_type took {} seconds.'
        .format(time() - time_start_globally))
    return json
def _get_OBJECT_TYPE_ENRICHER_ORDER():
    with _OBJECT_TYPE_ENRICHERS_LOCK:
        global _OBJECT_TYPE_ENRICHER_ORDER
        if _OBJECT_TYPE_ENRICHER_ORDER is None:
            global _OBJECT_TYPE_ENRICHERS
            visited = set()
            stack = set()
            order = []
            enrichers = _OBJECT_TYPE_ENRICHERS
            refs = set(flatten([enricher_info['dependencies'] for enricher_info in enrichers.values()]))
            roots = set(enrichers.keys()) - refs

            def _visit(enricher_info):
                if enricher_info['enricher_name'] in visited:
                    return
                if enricher_info['enricher_name'] in stack:
                    raise Exception('There is a cycle in dependencies of enrichers.')
                stack.add(enricher_info['enricher_name'])
                visited.add(enricher_info['enricher_name'])
                for enricher_dep in sorted(enricher_info['dependencies'], key=lambda name: enrichers[name]['priority']):
                    _visit(enrichers[enricher_dep])
                stack.remove(enricher_info['enricher_name'])
                order.append(enricher_info)

            for enricher_name, enricher_info in sorted(enrichers.items(), key=lambda x: x[1]['priority']):
                if enricher_name not in roots:
                    continue
                _visit(enricher_info)
            indexes = dict([(enricher_info['enricher_name'], i) for (i, enricher_info) in enumerate(order)])
            _OBJECT_TYPE_ENRICHER_ORDER = sorted(order, key=lambda e: indexes[e['enricher_name']])
        return _OBJECT_TYPE_ENRICHER_ORDER
 def _load_item_relations(self, data, db_objects, categories_json_key):
     print("\nFilling item types")
     call_command('fill_item_types')
     print("\nBuilding dependencies")
     parent_subgraph = {}
     lang_intersect = None
     for json_object in progress.bar(data, every=max(1, len(data) / 100)):
         # The language is not important here.
         langs = [
             k[-2:] for k in json_object.keys()
             if re.match(r'^name-\w\w$', k)
         ]
         lang_intersect = set(
             langs
         ) if lang_intersect is None else lang_intersect & set(langs)
         lang = langs[0]
         db_object = db_objects[json_object["id"], lang]
         parent_items = parent_subgraph.get(db_object.item_id, set())
         for parent in json_object.get(categories_json_key, []):
             parent_items.add('proso_flashcards_category/{}'.format(parent))
         parent_subgraph[db_object.item_id] = parent_items
     lang = lang_intersect.pop()
     translated = Item.objects.translate_identifiers(
         flatten(parent_subgraph.values()), lang)
     Item.objects.override_parent_subgraph({
         item: [translated[parent] for parent in parents]
         for item, parents in parent_subgraph.items()
     })
 def _load_item_relations(self, data, db_objects, categories_json_key):
     db_objects_processed = {}
     for (identifier, lang), db_object in db_objects.items():
         _, found_langs = db_objects_processed.get(identifier, (None, []))
         db_objects_processed[identifier] = db_object, found_langs + [lang]
     print("\nFilling item types")
     call_command('fill_item_types')
     print("\nBuilding dependencies")
     parent_subgraph = {}
     lang_intersect = None
     for json_object in progress.bar(data, every=max(1, len(data) / 100)):
         db_object, langs = db_objects_processed[json_object["id"]]
         # The language is not important here.
         lang_intersect = set(langs) if lang_intersect is None else lang_intersect & set(langs)
         parent_items = parent_subgraph.get(db_object.item_id, set())
         for parent in json_object.get(categories_json_key, []):
             parent_items.add('proso_flashcards_category/{}'.format(parent))
         if 'context' in json_object:
             parent_items.add('proso_flashcards_context/{}'.format(json_object['context']))
         if 'term' in json_object:
             parent_items.add('proso_flashcards_term/{}'.format(json_object['term']))
         if 'term-secondary' in json_object:
             parent_items.add('proso_flashcards_term/{}'.format(json_object['term-secondary']))
         parent_subgraph[db_object.item_id] = parent_items
     lang = lang_intersect.pop()
     translated = Item.objects.translate_identifiers(
         flatten(parent_subgraph.values()), lang
     )
     Item.objects.override_parent_subgraph({
         item: [translated[parent] for parent in parents]
         for item, parents in parent_subgraph.items()
     })
def context_flashcards(request, json_list, nested):
    if nested or len(json_list) > 1:
        return
    leave_items = Item.objects.get_leaves([c['item_id'] for c in json_list])
    translated = Item.objects.translate_item_ids(flatten(leave_items.values()), json_list[0]['lang'])
    for context in json_list:
        context['flashcards'] = [translated[i] for i in leave_items[context['item_id']]]
 def handle_dry(self, options):
     info = self.load_environment_info(options['initial'], options['config_name'], True)
     environment = InMemoryEnvironment()
     environment = self.load_environment(info)
     users, items = self.load_user_and_item_ids(info, options['batch_size'])
     items += list(set(flatten(Item.objects.get_reachable_parents(items).values())))
     environment.prefetch(users, items)
     predictive_model = get_predictive_model(info.to_json())
     with closing(connection.cursor()) as cursor:
         cursor.execute('SELECT COUNT(*) FROM proso_models_answer')
         answers_total = cursor.fetchone()[0]
         if options['limit'] is not None:
             answers_total = min(answers_total, options['limit'])
         print('total:', answers_total)
         processed = 0
         prediction = numpy.empty(answers_total)
         correct = numpy.empty(answers_total)
         while processed < answers_total:
             cursor.execute(
                 '''
                 SELECT
                     id,
                     user_id,
                     item_id,
                     item_asked_id,
                     item_answered_id,
                     time,
                     response_time,
                     guess
                 FROM proso_models_answer
                 ORDER BY id
                 OFFSET %s LIMIT %s
                 ''', [processed, options['batch_size']])
             for (answer_id, user, item, asked, answered, time, response_time, guess) in cursor:
                 correct[processed] = asked == answered
                 prediction[processed] = predictive_model.predict_and_update(
                     environment,
                     user,
                     item,
                     asked == answered,
                     time,
                     item_answered=answered,
                     item_asked=asked,
                     guess=guess,
                     answer_id=answer_id,
                     response_time=response_time,
                 )
                 environment.process_answer(user, item, asked, answered, time, answer_id, response_time, guess)
                 processed += 1
                 if processed >= answers_total:
                     break
             print('processed:', processed)
     filename = settings.DATA_DIR + '/recompute_model_report_{}.json'.format(predictive_model.__class__.__name__)
     model_report = report(prediction, correct)
     with open(filename, 'w') as outfile:
         json.dump(model_report, outfile)
     print('Saving report to:', filename)
     brier_graphs(model_report['brier'], predictive_model)
    def get_option_for_flashcards(self, flashcards_with_question_types):
        question_types = {fc['id']: question_type for fc, question_type in flashcards_with_question_types}
        opt_set_cache = cache.get('flashcard_construction__context_option_set', {})
        to_find = [fc for (fc, question_type) in flashcards_with_question_types if (fc['item_id'], question_type) not in opt_set_cache]
        if len(to_find) > 0:
            context_ids = {self.get_context_id(flashcard) for flashcard in to_find}
            types_all_item_ids = set([c.item_id for c in Category.objects.filter(type='flashcard_type')])
            flashcard_item_ids = set([flashcard['item_id'] for flashcard in to_find])
            reachable_parents = Item.objects.get_reachable_parents(flashcard_item_ids, language=to_find[0]['lang'])
            flashcard_types = {item_id: set(reachable_parents.get(item_id, [])) & types_all_item_ids for item_id in flashcard_item_ids}

            context_item_ids = dict(Context.objects.filter(pk__in=context_ids).values_list('id', 'item_id'))

            secondary_terms = dict(Flashcard.objects.all().values_list('item_id', 'term_secondary_id'))
            found = {
                flashcard['item_id']: [i for i in reduce(
                    lambda xs, ys: set(xs) & set(ys),
                    Item.objects.get_leaves({context_item_ids[self.get_context_id(flashcard)]} | flashcard_types[flashcard['item_id']], language=flashcard['lang']).values()
                ) if (secondary_terms.get(i) is not None) == ('term_secondary' in flashcard)]
                for flashcard in to_find
            }
            if any(['term_secondary' in flashcard for flashcard in to_find]):
                # exclude options:
                #     1) with duplicate term/term_secondary
                #     2) with the same question but different answer
                translated = Item.objects.translate_item_ids(set(flatten(found.values())), language=to_find[0]['lang'])
                fc_dict = {flashcard['item_id']: flashcard for flashcard in to_find}
                found_translated = {
                    item_id: [translated[opt_id] for opt_id in options]
                    for item_id, options in found.items()
                }
                found = {}
                for fc_item_id, options in found_translated.items():
                    fc = fc_dict[fc_item_id]
                    if question_types[fc['id']] == FlashcardAnswer.FROM_TERM_TO_TERM_SECONDARY:
                        key_to = 'term_secondary'
                        key_from = 'term'
                    elif question_types[fc['id']] == FlashcardAnswer.FROM_TERM_SECONDARY_TO_TERM:
                        key_to = 'term'
                        key_from = 'term_secondary'
                    else:
                        found[fc['item_id']] = [opt['item_id'] for opt in options]
                        continue
                    options_by_keys = {}
                    for opt in sorted(options, key=lambda o: o['identifier']):
                        if self.get_context_id(fc) == opt['context_id'] and fc[key_from]['identifier'] == opt[key_from]['identifier']:
                            continue
                        options_by_keys[opt[key_to]['item_id']] = opt
                    if fc[key_to]['item_id'] in options_by_keys:
                        del options_by_keys[fc[key_to]['item_id']]
                    found[fc['item_id']] = [opt['item_id'] for opt in options_by_keys.values()]

            # trying to decrease probability of race condition
            opt_set_cache = cache.get('flashcard_construction__context_option_set', {})
            opt_set_cache.update(found)
            cache.set('flashcard_construction__context_option_set', opt_set_cache)
        return {fc['item_id']: opt_set_cache[fc['item_id']] for fc, _ in flashcards_with_question_types}
Example #8
0
def context_flashcards(request, json_list, nested):
    if nested or len(json_list) > 1:
        return
    leave_items = Item.objects.get_leaves([c['item_id'] for c in json_list])
    translated = Item.objects.translate_item_ids(flatten(leave_items.values()),
                                                 json_list[0]['lang'])
    for context in json_list:
        context['flashcards'] = [
            translated[i] for i in leave_items[context['item_id']]
        ]
 def handle(self, *args, **options):
     if len(options['roots']) == 0:
         raise Exception('At least one root has to be specified.')
     translated_roots = Item.objects.translate_identifiers(options['roots'], options['lang'])
     graph = Item.objects.get_children_graph(list(translated_roots.values()), language=options['lang'])
     translated_items = {
         i: '{}/{}'.format(o['object_type'], o['identifier'])
         for i, o in Item.objects.translate_item_ids(flatten(graph.values()), language=options['lang'], is_nested=True).items()
     }
     translated_graph = {translated_items.get(u): [translated_items.get(v) for v in vs] for u, vs in graph.items()}
     print(json.dumps(translated_graph))
Example #10
0
    def recalculate_concepts(self, concepts, lang=None):
        """
        Recalculated given concepts for given users

        Args:
            concepts (dict): user id (int -> set of concepts to recalculate)
            lang(Optional[str]): language used to get items in all concepts (cached).
                Defaults to None, in that case are get items only in used concepts
        """
        if len(concepts) == 0:
            return

        if lang is None:
            items = Concept.objects.get_concept_item_mapping(concepts=Concept.objects.filter(pk__in=set(flatten(concepts.values()))))
        else:
            items = Concept.objects.get_concept_item_mapping(lang=lang)

        environment = get_environment()
        mastery_threshold = get_mastery_trashold()
        for user, concepts in concepts.items():
            all_items = list(set(flatten([items[c] for c in concepts])))
            answer_counts = dict(list(zip(all_items, environment.number_of_answers_more_items(all_items, user))))
            correct_answer_counts = dict(list(zip(all_items,
                                                  environment.number_of_correct_answers_more_items(all_items, user))))
            predictions = dict(list(zip(all_items, get_predictive_model().
                                        predict_more_items(environment, user, all_items, time=None))))
            new_user_stats = []
            stats_to_delete_condition = Q()
            for concept in concepts:
                answer_aggregates = Answer.objects.filter(user=user, item__in=items[concept]).aggregate(
                    time_spent=Sum("response_time"),
                    sessions=Count("session", True),
                    time_first=Min("time"),
                    time_last=Max("time"),
                )
                stats = {
                    "answer_count": sum(answer_counts[i] for i in items[concept]),
                    "correct_answer_count": sum(correct_answer_counts[i] for i in items[concept]),
                    "item_count": len(items[concept]),
                    "practiced_items_count": sum([answer_counts[i] > 0 for i in items[concept]]),
                    "mastered_items_count": sum([predictions[i] >= mastery_threshold for i in items[concept]]),
                    "prediction": sum([predictions[i] for i in items[concept]]) / len(items[concept]),
                    "time_spent": answer_aggregates["time_spent"] / 1000,
                    "session_count": answer_aggregates["sessions"],
                    "time_first": answer_aggregates["time_first"].timestamp(),
                    "time_last": answer_aggregates["time_last"].timestamp(),
                }
                stats_to_delete_condition |= Q(user=user, concept=concept)
                for stat_name, value in stats.items():
                    new_user_stats.append(UserStat(user_id=user, concept_id=concept, stat=stat_name, value=value))
            self.filter(stats_to_delete_condition).delete()
            self.bulk_create(new_user_stats)
Example #11
0
def enrich_json_objects_by_object_type(request, value):
    """
    Take the given value and start enrichment by object_type. The va

    Args:
        request (django.http.request.HttpRequest): request which is currently processed
        value (dict|list|django.db.models.Model):
            in case of django.db.models.Model object (or list of these
            objects), to_json method is invoked

    Returns:
        dict|list
    """
    time_start_globally = time()
    if isinstance(value, list):
        json = [x.to_json() if hasattr(x, "to_json") else x for x in value]
    else:
        if isinstance(value, dict):
            json = value
        else:
            json = value.to_json()
    objects, nested = _collect_json_objects(json, by='object_type')
    for enricher_info in _get_OBJECT_TYPE_ENRICHER_ORDER():
        if len(enricher_info['object_types']) > 0:
            enricher_objects = flatten([objects.get(object_type, []) for object_type in enricher_info['object_types']])
            enricher_nested = any([nested.get(object_type, False) for object_type in enricher_info['object_types']])
        else:
            enricher_objects = flatten(objects.values())
            enricher_nested = any(nested.values())
        if len(enricher_objects) > 0:
            time_start = time()
            enricher_info['enricher'](request, enricher_objects, enricher_nested)
            LOGGER.debug('enrichment "{}" took {} seconds'.format(enricher_info['enricher_name'], time() - time_start))
            if not enricher_info['pure']:
                # if the enricher modified object types we must collect objects
                # again
                objects, nested = _collect_json_objects(json, by='object_type')
    LOGGER.debug('The whole enrichment of json objects by their object_type took {} seconds.'.format(time() - time_start_globally))
    return json
Example #12
0
def number_of_correct_answers(request, json_list, nested):
    if 'stats' not in request.GET:
        return
    object_item_ids = [x['item_id'] for x in json_list]
    user = get_user_id(request)
    leaves = models.Item.objects.get_leaves(object_item_ids, language=get_language(request))
    all_leaves = set(flatten(leaves.values()))
    number_of_correct_answers = _environment(request).number_of_correct_answers_more_items(
        user=user, items=all_leaves)
    for object_json in json_list:
        num = sum([number_of_correct_answers[leave] for leave in leaves[object_json['item_id']]])
        object_json['number_of_correct_answers'] = num
        object_json['practiced_correctly'] = num > 0
    return json_list
Example #13
0
def avg_prediction(request, json_list, nested):
    if 'stats' not in request.GET:
        return
    object_item_ids = [x['item_id'] for x in json_list]
    leaves = models.Item.objects.get_leaves(object_item_ids, language=get_language(request))
    all_leaves = list(set(flatten(leaves.values())))
    user = get_user_id(request)
    time = models.get_time_for_knowledge_overview(request)
    predictions = dict(list(zip(all_leaves, _predictive_model().predict_more_items(
        _environment(request),
        user,
        all_leaves,
        time
    ))))
    mastery_threshold = get_mastery_trashold()
    for object_json in json_list:
        leaf_predictions = [predictions[leave] for leave in leaves[object_json['item_id']]]
        object_json['avg_predicton'] = numpy.mean(leaf_predictions)
        object_json['mastered'] = sum([p > mastery_threshold for p in leaf_predictions])
Example #14
0
    def filter_all_reachable_leaves_many(self, identifier_filters, language):
        """
        Provides the same functionality as .. py:method:: ItemManager.filter_all_reachable_leaves(),
        but for more filters in the same time.

        Args:
            identifier_filters: list of identifier filters
            language (str): language used for further filtering (some objects
                for different languages share the same item

        Returns:
            list: list of list of item ids
        """
        for i, identifier_filter in enumerate(identifier_filters):
            if len(identifier_filter) == 1 and not isinstance(identifier_filter[0], list):
                identifier_filters[i] = [identifier_filter]
            if any([len(xs) == 1 and xs[0].startswith('-') for xs in identifier_filter]):
                raise Exception('Filter containing only one identifier with "-" prefix is not allowed.')
        item_identifiers = [
            identifier[1:] if identifier.startswith('-') else identifier
            for identifier_filter in identifier_filters
            for identifier in set(flatten(identifier_filter))
        ]
        translated = self.translate_identifiers(item_identifiers, language)
        leaves = self.get_leaves(list(translated.values()))
        result = []
        for identifier_filter in identifier_filters:
            filter_result = set()
            for inner_filter in identifier_filter:
                inner_result = None
                inner_neg_result = set()
                if len(inner_filter) == 0:
                    raise Exception('Empty nested filters are not allowed.')
                for identifier in inner_filter:
                    if identifier.startswith('-'):
                        inner_neg_result |= set(leaves[translated[identifier[1:]]])
                    elif inner_result is None:
                        inner_result = set(leaves[translated[identifier]])
                    else:
                        inner_result &= set(leaves[translated[identifier]])
                filter_result |= inner_result - inner_neg_result
            result.append(sorted(list(filter_result)))
        return result
Example #15
0
 def handle(self, *args, **options):
     if len(options['roots']) == 0:
         raise Exception('At least one root has to be specified.')
     translated_roots = Item.objects.translate_identifiers(
         options['roots'], options['lang'])
     graph = Item.objects.get_children_graph(list(
         translated_roots.values()),
                                             language=options['lang'])
     translated_items = {
         i: '{}/{}'.format(o['object_type'], o['identifier'])
         for i, o in Item.objects.translate_item_ids(
             flatten(graph.values()),
             language=options['lang'],
             is_nested=True).items()
     }
     translated_graph = {
         translated_items.get(u): [translated_items.get(v) for v in vs]
         for u, vs in graph.items()
     }
     print(json.dumps(translated_graph))
Example #16
0
def _get_OBJECT_TYPE_ENRICHER_ORDER():
    with _OBJECT_TYPE_ENRICHERS_LOCK:
        global _OBJECT_TYPE_ENRICHER_ORDER
        if _OBJECT_TYPE_ENRICHER_ORDER is None:
            global _OBJECT_TYPE_ENRICHERS
            visited = set()
            stack = set()
            order = []
            enrichers = _OBJECT_TYPE_ENRICHERS
            refs = set(
                flatten([
                    enricher_info['dependencies']
                    for enricher_info in enrichers.values()
                ]))
            roots = set(enrichers.keys()) - refs

            def _visit(enricher_info):
                if enricher_info['enricher_name'] in visited:
                    return
                if enricher_info['enricher_name'] in stack:
                    raise Exception(
                        'There is a cycle in dependencies of enrichers.')
                stack.add(enricher_info['enricher_name'])
                visited.add(enricher_info['enricher_name'])
                for enricher_dep in sorted(
                        enricher_info['dependencies'],
                        key=lambda name: enrichers[name]['priority']):
                    _visit(enrichers[enricher_dep])
                stack.remove(enricher_info['enricher_name'])
                order.append(enricher_info)

            for enricher_name, enricher_info in sorted(
                    enrichers.items(), key=lambda x: x[1]['priority']):
                if enricher_name not in roots:
                    continue
                _visit(enricher_info)
            indexes = dict([(enricher_info['enricher_name'], i)
                            for (i, enricher_info) in enumerate(order)])
            _OBJECT_TYPE_ENRICHER_ORDER = sorted(
                order, key=lambda e: indexes[e['enricher_name']])
        return _OBJECT_TYPE_ENRICHER_ORDER
Example #17
0
 def _load_item_relations(self, data, db_objects, categories_json_key):
     db_objects_processed = {}
     for (identifier, lang), db_object in db_objects.items():
         _, found_langs = db_objects_processed.get(identifier, (None, []))
         db_objects_processed[identifier] = db_object, found_langs + [lang]
     print("\nFilling item types")
     call_command('fill_item_types')
     print("\nBuilding dependencies")
     parent_subgraph = {}
     lang_intersect = None
     for json_object in progress.bar(data, every=max(1, len(data) / 100)):
         db_object, langs = db_objects_processed[json_object["id"]]
         # The language is not important here.
         lang_intersect = set(
             langs
         ) if lang_intersect is None else lang_intersect & set(langs)
         parent_items = parent_subgraph.get(db_object.item_id, set())
         for parent in json_object.get(categories_json_key, []):
             parent_items.add('proso_flashcards_category/{}'.format(parent))
         if 'context' in json_object:
             parent_items.add('proso_flashcards_context/{}'.format(
                 json_object['context']))
         if 'term' in json_object:
             parent_items.add('proso_flashcards_term/{}'.format(
                 json_object['term']))
         if 'term-secondary' in json_object:
             parent_items.add('proso_flashcards_term/{}'.format(
                 json_object['term-secondary']))
         parent_subgraph[db_object.item_id] = parent_items
     lang = lang_intersect.pop()
     translated = Item.objects.translate_identifiers(
         flatten(parent_subgraph.values()), lang)
     Item.objects.override_parent_subgraph({
         item: [translated[parent] for parent in parents]
         for item, parents in parent_subgraph.items()
     })
 def _load_item_relations(self, data, db_objects, categories_json_key):
     print("\nFilling item types")
     call_command('fill_item_types')
     print("\nBuilding dependencies")
     parent_subgraph = {}
     lang_intersect = None
     for json_object in progress.bar(data, every=max(1, len(data) / 100)):
         # The language is not important here.
         langs = [k[-2:] for k in json_object.keys() if re.match(r'^name-\w\w$', k)]
         lang_intersect = set(langs) if lang_intersect is None else lang_intersect & set(langs)
         lang = langs[0]
         db_object = db_objects[json_object["id"], lang]
         parent_items = parent_subgraph.get(db_object.item_id, set())
         for parent in json_object.get(categories_json_key, []):
             parent_items.add('proso_flashcards_category/{}'.format(parent))
         parent_subgraph[db_object.item_id] = parent_items
     lang = lang_intersect.pop()
     translated = Item.objects.translate_identifiers(
         flatten(parent_subgraph.values()), lang
     )
     Item.objects.override_parent_subgraph({
         item: [translated[parent] for parent in parents]
         for item, parents in parent_subgraph.items()
     })
Example #19
0
 def __search(item_ids):
     result = set(flatten([children.get(item_id, []) for item_id in item_ids]))
     new_leaves = {item_id for item_id in result if item_id not in children.keys()}
     leaves.update(new_leaves)
     return result - new_leaves
 def recompute(self, info, options):
     print(' -- preparing phase')
     timer('recompute_prepare')
     environment = self.load_environment(info)
     users, items = self.load_user_and_item_ids(info, options['batch_size'])
     items += list(set(flatten(Item.objects.get_reachable_parents(items).values())))
     environment.prefetch(users, items)
     predictive_model = get_predictive_model(info.to_json())
     print(' -- preparing phase, time:', timer('recompute_prepare'), 'seconds')
     timer('recompute_model')
     print(' -- model phase')
     with closing(connection.cursor()) as cursor:
         cursor.execute(
             '''
             SELECT
                 id,
                 user_id,
                 item_id,
                 item_asked_id,
                 item_answered_id,
                 time,
                 response_time,
                 guess
             FROM proso_models_answer
             ORDER BY id
             OFFSET %s LIMIT %s
             ''', [info.load_progress, options['batch_size']])
         progress_bar = progress.bar(cursor, every=max(1, cursor.rowcount // 100), expected_size=cursor.rowcount)
         info.load_progress += cursor.rowcount
         for (answer_id, user, item, asked, answered, time, response_time, guess) in progress_bar:
             predictive_model.predict_and_update(
                 environment,
                 user,
                 item,
                 asked == answered,
                 time,
                 item_answered=answered,
                 item_asked=asked,
                 guess=guess,
                 answer_id=answer_id,
                 response_time=response_time,
             )
             environment.process_answer(user, item, asked, answered, time, answer_id, response_time, guess)
     print(' -- model phase, time:', timer('recompute_model'), 'seconds')
     timer('recompute_flush')
     print(' -- flushing phase')
     environment.flush(clean=options['finish'])
     print(' -- flushing phase, time:', timer('recompute_flush'), 'seconds, total number of answers:', info.load_progress)
     if options['finish']:
         timer('recompute_finish')
         print(' -- finishing phase')
         try:
             previous_info = EnvironmentInfo.objects.get(status=EnvironmentInfo.STATUS_ACTIVE)
             previous_info.status = EnvironmentInfo.STATUS_DISABLED
             previous_info.save()
             cache.delete(ENVIRONMENT_INFO_CACHE_KEY)
         except EnvironmentInfo.DoesNotExist:
             pass
         info.status = EnvironmentInfo.STATUS_ACTIVE
         print(' -- finishing phase, time:', timer('recompute_finish'), 'seconds')
     info.save()
Example #21
0
def user_stats(request):
    """
    Get user statistics for selected groups of items

    time:
      time in format '%Y-%m-%d_%H:%M:%S' used for practicing
    user:
      identifier of the user (only for stuff users)
    username:
      username of user (only for users with public profile)
    filters:                -- use this or body
      json as in BODY
    mastered:
      use model to compute number of mastered items - can be slowed
    language:
      language of the items

    BODY
      json in following format:
      {
        "#identifier": []         -- custom identifier (str) and filter
        ...
      }
    """
    timer('user_stats')
    response = {}
    data = None
    if request.method == "POST":
        data = json.loads(request.body.decode("utf-8"))["filters"]
    if "filters" in request.GET:
        data = load_query_json(request.GET, "filters")
    if data is None:
        return render_json(request, {}, template='models_user_stats.html', help_text=user_stats.__doc__)
    environment = get_environment()
    if is_time_overridden(request):
        environment.shift_time(get_time(request))
    user_id = get_user_id(request)
    language = get_language(request)
    filter_names, filter_filters = list(zip(*sorted(data.items())))
    reachable_leaves = Item.objects.filter_all_reachable_leaves_many(filter_filters, language)
    all_leaves = flatten(reachable_leaves)
    answers = dict(list(zip(all_leaves, environment.number_of_answers_more_items(all_leaves, user_id))))
    correct_answers = dict(list(zip(all_leaves, environment.number_of_correct_answers_more_items(all_leaves, user_id))))
    if request.GET.get("mastered"):
        timer('user_stats_mastered')
        mastery_threshold = get_mastery_trashold()
        predictions = get_predictive_model().predict_more_items(environment, user_id, all_leaves, get_time(request))
        mastered = dict(list(zip(all_leaves, [p >= mastery_threshold for p in predictions])))
        LOGGER.debug("user_stats - getting predictions for flashcards took %s seconds", (timer('user_stats_mastered')))
    for identifier, items in zip(filter_names, reachable_leaves):
        if len(items) == 0:
            response[identifier] = {
                "filter": data[identifier],
                "number_of_flashcards": 0,
            }
        else:
            response[identifier] = {
                "filter": data[identifier],
                "number_of_flashcards": len(items),
                "number_of_practiced_flashcards": sum(answers[i] > 0 for i in items),
                "number_of_answers": sum(answers[i] for i in items),
                "number_of_correct_answers": sum(correct_answers[i] for i in items),
            }
            if request.GET.get("mastered"):
                response[identifier]["number_of_mastered_flashcards"]= sum(mastered[i] for i in items)
    return render_json(request, response, template='models_user_stats.html', help_text=user_stats.__doc__)
Example #22
0
    def recalculate_concepts(self, concepts, lang=None):
        """
        Recalculated given concepts for given users

        Args:
            concepts (dict): user id (int -> set of concepts to recalculate)
            lang(Optional[str]): language used to get items in all concepts (cached).
                Defaults to None, in that case are get items only in used concepts
        """
        if len(concepts) == 0:
            return

        if lang is None:
            items = Concept.objects.get_concept_item_mapping(
                concepts=Concept.objects.filter(
                    pk__in=set(flatten(concepts.values()))))
        else:
            items = Concept.objects.get_concept_item_mapping(lang=lang)

        environment = get_environment()
        mastery_threshold = get_mastery_trashold()
        for user, concepts in concepts.items():
            all_items = list(set(flatten([items[c] for c in concepts])))
            answer_counts = dict(
                list(
                    zip(
                        all_items,
                        environment.number_of_answers_more_items(
                            all_items, user))))
            correct_answer_counts = dict(
                list(
                    zip(
                        all_items,
                        environment.number_of_correct_answers_more_items(
                            all_items, user))))
            predictions = dict(
                list(
                    zip(
                        all_items,
                        get_predictive_model().predict_more_items(environment,
                                                                  user,
                                                                  all_items,
                                                                  time=None))))
            new_user_stats = []
            stats_to_delete_condition = Q()
            for concept in concepts:
                answer_aggregates = Answer.objects.filter(
                    user=user, item__in=items[concept]).aggregate(
                        time_spent=Sum("response_time"),
                        sessions=Count("session", True),
                        time_first=Min("time"),
                        time_last=Max("time"),
                    )
                stats = {
                    "answer_count":
                    sum(answer_counts[i] for i in items[concept]),
                    "correct_answer_count":
                    sum(correct_answer_counts[i] for i in items[concept]),
                    "item_count":
                    len(items[concept]),
                    "practiced_items_count":
                    sum([answer_counts[i] > 0 for i in items[concept]]),
                    "mastered_items_count":
                    sum([
                        predictions[i] >= mastery_threshold
                        for i in items[concept]
                    ]),
                    "prediction":
                    sum([predictions[i]
                         for i in items[concept]]) / len(items[concept]),
                    "time_spent":
                    answer_aggregates["time_spent"] / 1000,
                    "session_count":
                    answer_aggregates["sessions"],
                    "time_first":
                    answer_aggregates["time_first"].timestamp(),
                    "time_last":
                    answer_aggregates["time_last"].timestamp(),
                }
                stats_to_delete_condition |= Q(user=user, concept=concept)
                for stat_name, value in stats.items():
                    new_user_stats.append(
                        UserStat(user_id=user,
                                 concept_id=concept,
                                 stat=stat_name,
                                 value=value))
            self.filter(stats_to_delete_condition).delete()
            self.bulk_create(new_user_stats)
Example #23
0
def user_stats(request):
    """
    Get user statistics for selected groups of items

    time:
      time in format '%Y-%m-%d_%H:%M:%S' used for practicing
    user:
      identifier of the user (only for stuff users)
    username:
      username of user (only for users with public profile)
    filters:                -- use this or body
      json as in BODY
    mastered:
      use model to compute number of mastered items - can be slowed
    language:
      language of the items

    BODY
      json in following format:
      {
        "#identifier": []         -- custom identifier (str) and filter
        ...
      }
    """
    timer('user_stats')
    response = {}
    data = None
    if request.method == "POST":
        data = json.loads(request.body.decode("utf-8"))["filters"]
    if "filters" in request.GET:
        data = load_query_json(request.GET, "filters")
    if data is None:
        return render_json(request, {},
                           template='models_user_stats.html',
                           help_text=user_stats.__doc__)
    environment = get_environment()
    if is_time_overridden(request):
        environment.shift_time(get_time(request))
    user_id = get_user_id(request)
    language = get_language(request)
    filter_names, filter_filters = list(zip(*sorted(data.items())))
    reachable_leaves = Item.objects.filter_all_reachable_leaves_many(
        filter_filters, language)
    all_leaves = sorted(list(set(flatten(reachable_leaves))))
    answers = environment.number_of_answers_more_items(all_leaves, user_id)
    correct_answers = environment.number_of_correct_answers_more_items(
        all_leaves, user_id)
    if request.GET.get("mastered"):
        timer('user_stats_mastered')
        mastery_threshold = get_mastery_trashold()
        predictions = Item.objects.predict_for_overview(
            environment, user_id, all_leaves)
        mastered = dict(
            list(zip(all_leaves,
                     [p >= mastery_threshold for p in predictions])))
        LOGGER.debug(
            "user_stats - getting predictions for items took %s seconds",
            (timer('user_stats_mastered')))
    for identifier, items in zip(filter_names, reachable_leaves):
        if len(items) == 0:
            response[identifier] = {
                "filter": data[identifier],
                "number_of_items": 0,
            }
        else:
            response[identifier] = {
                "filter":
                data[identifier],
                "number_of_items":
                len(items),
                "number_of_practiced_items":
                sum(answers[i] > 0 for i in items),
                "number_of_answers":
                sum(answers[i] for i in items),
                "number_of_correct_answers":
                sum(correct_answers[i] for i in items),
            }
            if request.GET.get("mastered"):
                response[identifier]["number_of_mastered_items"] = sum(
                    mastered[i] for i in items)
    return render_json(request,
                       response,
                       template='models_user_stats.html',
                       help_text=user_stats.__doc__)
Example #24
0
 def recompute(self, info, options):
     print(' -- preparing phase')
     timer('recompute_prepare')
     environment = self.load_environment(info)
     users, items = self.load_user_and_item_ids(info, options['batch_size'])
     items += list(
         set(flatten(Item.objects.get_reachable_parents(items).values())))
     environment.prefetch(users, items)
     predictive_model = get_predictive_model(info.to_json())
     print(' -- preparing phase, time:', timer('recompute_prepare'),
           'seconds')
     timer('recompute_model')
     print(' -- model phase')
     with closing(connection.cursor()) as cursor:
         cursor.execute(
             '''
             SELECT
                 id,
                 user_id,
                 item_id,
                 item_asked_id,
                 item_answered_id,
                 time,
                 response_time,
                 guess
             FROM proso_models_answer
             ORDER BY id
             OFFSET %s LIMIT %s
             ''', [info.load_progress, options['batch_size']])
         progress_bar = progress.bar(cursor,
                                     every=max(1, cursor.rowcount // 100),
                                     expected_size=cursor.rowcount)
         info.load_progress += cursor.rowcount
         for (answer_id, user, item, asked, answered, time, response_time,
              guess) in progress_bar:
             predictive_model.predict_and_update(
                 environment,
                 user,
                 item,
                 asked == answered,
                 time,
                 item_answered=answered,
                 item_asked=asked,
                 guess=guess,
                 answer_id=answer_id,
                 response_time=response_time,
             )
             environment.process_answer(user, item, asked, answered, time,
                                        answer_id, response_time, guess)
     print(' -- model phase, time:', timer('recompute_model'), 'seconds')
     timer('recompute_flush')
     print(' -- flushing phase')
     environment.flush(clean=options['finish'])
     print(' -- flushing phase, time:', timer('recompute_flush'),
           'seconds, total number of answers:', info.load_progress)
     if options['finish']:
         timer('recompute_finish')
         print(' -- finishing phase')
         try:
             previous_info = EnvironmentInfo.objects.get(
                 status=EnvironmentInfo.STATUS_ACTIVE)
             previous_info.status = EnvironmentInfo.STATUS_DISABLED
             previous_info.save()
             cache.delete(ENVIRONMENT_INFO_CACHE_KEY)
         except EnvironmentInfo.DoesNotExist:
             pass
         info.status = EnvironmentInfo.STATUS_ACTIVE
         print(' -- finishing phase, time:', timer('recompute_finish'),
               'seconds')
     info.save()
Example #25
0
 def handle_dry(self, options):
     info = self.load_environment_info(options['initial'],
                                       options['config_name'], True)
     environment = InMemoryEnvironment(audit_enabled=False)
     environment = self.load_environment(info)
     users, items = self.load_user_and_item_ids(info, options['batch_size'])
     items += list(
         set(flatten(Item.objects.get_reachable_parents(items).values())))
     environment.prefetch(users, items)
     predictive_model = get_predictive_model(info.to_json())
     with closing(connection.cursor()) as cursor:
         cursor.execute('SELECT COUNT(*) FROM proso_models_answer')
         answers_total = cursor.fetchone()[0]
         if options['limit'] is not None:
             answers_total = min(answers_total, options['limit'])
         print('total:', answers_total)
         processed = 0
         prediction = numpy.empty(answers_total)
         correct = numpy.empty(answers_total)
         while processed < answers_total:
             cursor.execute(
                 '''
                 SELECT
                     id,
                     user_id,
                     item_id,
                     item_asked_id,
                     item_answered_id,
                     time,
                     response_time,
                     guess
                 FROM proso_models_answer
                 ORDER BY id
                 OFFSET %s LIMIT %s
                 ''', [processed, options['batch_size']])
             for (answer_id, user, item, asked, answered, time,
                  response_time, guess) in cursor:
                 correct[processed] = asked == answered
                 prediction[
                     processed] = predictive_model.predict_and_update(
                         environment,
                         user,
                         item,
                         asked == answered,
                         time,
                         item_answered=answered,
                         item_asked=asked,
                         guess=guess,
                         answer_id=answer_id,
                         response_time=response_time,
                     )
                 environment.process_answer(user, item, asked, answered,
                                            time, answer_id, response_time,
                                            guess)
                 processed += 1
                 if processed >= answers_total:
                     break
             print('processed:', processed)
     filename = settings.DATA_DIR + '/recompute_model_report_{}.json'.format(
         predictive_model.__class__.__name__)
     model_report = report(prediction, correct)
     with open(filename, 'w') as outfile:
         json.dump(model_report, outfile)
     print('Saving report to:', filename)
     brier_graphs(model_report['brier'], predictive_model)