Beispiel #1
0
 def _reachable_graph(self, item_ids, neighbors):
     return {i: deps for i, deps in fixed_point(
         is_zero=lambda xs: len(xs) == 0,
         minus=lambda xs, ys: {x: vs for (x, vs) in xs.items() if x not in ys},
         plus=lambda xs, ys: dict(list(xs.items()) + list(ys.items())),
         f=neighbors,
         x={None: item_ids}).items() if len(deps) > 0}
Beispiel #2
0
 def _reachable_items(self, graph):
     return {i: sorted(list(fixed_point(
         is_zero=lambda xs: len(xs) == 0,
         minus=lambda xs, ys: xs - ys,
         plus=lambda xs, ys: xs | ys,
         f=lambda xs: reduce(lambda a, b: a | b, [set(graph.get(x, [])) for x in xs], set()),
         x={i}
     ) - {i})) for i in graph[None]}
Beispiel #3
0
    def filter_all_reachable_leaves_many(self, identifier_filters, language):
        """
        Provides the same functionality as .. py:method:: ItemManager.filter_all_reachable_leaves(),
        but for more filters in the same time.

        Args:
            identifier_filters: list of identifier filters
            language (str): language used for further filtering (some objects
                for different languages share the same item

        Returns:
            list: list of list of item ids
        """
        for i, identifier_filter in enumerate(identifier_filters):
            if len(identifier_filter) == 1 and not isinstance(identifier_filter[0], list):
                identifier_filters[i] = [identifier_filter]
            if any([len(xs) == 1 and xs[0].startswith('-') for xs in identifier_filter]):
                raise Exception('Filter containing only one identifier with "-" prefix is not allowed.')
        item_identifiers = [
            identifier[1:] if identifier.startswith('-') else identifier
            for identifier_filter in identifier_filters
            for identifier in set(flatten(identifier_filter))
        ]
        translated = self.translate_identifiers(item_identifiers, language)
        leaves = self.get_leaves(list(translated.values()))
        result = []
        for identifier_filter in identifier_filters:
            filter_result = set()
            for inner_filter in identifier_filter:
                inner_result = None
                inner_neg_result = set()
                if len(inner_filter) == 0:
                    raise Exception('Empty nested filters are not allowed.')
                for identifier in inner_filter:
                    if identifier.startswith('-'):
                        inner_neg_result |= set(leaves[translated[identifier[1:]]])
                    elif inner_result is None:
                        inner_result = set(leaves[translated[identifier]])
                    else:
                        inner_result &= set(leaves[translated[identifier]])
                filter_result |= inner_result - inner_neg_result
            result.append(sorted(list(filter_result)))
        return result
Beispiel #4
0
 def to_json(self, nested=False):
     return {
         'id': self.id,
         'object_type': 'environment_info',
         'status': dict(list(EnvironmentInfo.STATUS))[self.status],
         'revision': self.revision,
         'updated': self.updated.strftime('%Y-%m-%d %H:%M:%S'),
         'created': self.created.strftime('%Y-%m-%d %H:%M:%S'),
         'config': json.loads(self.config.content),
     }
Beispiel #5
0
    def override_children_subgraph(self, children_subgraph, invisible_edges=None):
        """
        Get all items with outcoming edges from the given subgraph, drop all
        their child relations, and then add children according to the given
        subgraph.

        Args:
            children_subgraph (dict): item id -> list of chidlren (item ids)
            invisible_edges (list|set): set of (from, to) tuples specifying
                invisible edges
        """
        with transaction.atomic():
            if invisible_edges is None:
                invisible_edges = set()
            parents = list(children_subgraph.keys())
            all_old_relations = dict(proso.list.group_by(
                list(ItemRelation.objects.filter(parent_id__in=parents)),
                by=lambda relation: relation.parent_id
            ))
            to_delete = set()
            for parent_id, children in children_subgraph.items():
                old_relations = {
                    relation.child_id: relation
                    for relation in all_old_relations.get(parent_id, [])
                }
                for child_id in children:
                    if child_id not in old_relations:
                        ItemRelation.objects.create(
                            parent_id=parent_id,
                            child_id=child_id,
                            visible=(parent_id, child_id) not in invisible_edges
                        )
                    elif old_relations[child_id].visible != (parent_id, child_id) not in invisible_edges:
                        old_relations[child_id].visible = (parent_id, child_id) not in invisible_edges
                        old_relations[child_id].save()
                to_delete |= {old_relations[child_id].pk for child_id in set(old_relations.keys()) - set(children)}
            ItemRelation.objects.filter(pk__in=to_delete).delete()
Beispiel #6
0
    def translate_identifiers(self, identifiers, language):
        """
        Translate a list of identifiers to item ids. Identifier is a string of
        the following form:

        <model_prefix>/<model_identifier>

        where <model_prefix> is any suffix of database table of the given model
        which uniquely specifies the table, and <model_identifier> is
        identifier of the object.

        Args:
            identifiers (list[str]): list of identifiers
            language (str): language used for further filtering (some objects
                for different languages share the same item

        Returns:
            dict: identifier -> item id
        """
        result = {}
        identifiers = set(identifiers)
        item_types = ItemType.objects.get_all_types()
        for item_type_id, type_identifiers in proso.list.group_by(identifiers, by=lambda identifier: self.get_item_type_id_from_identifier(identifier, item_types)).items():
            to_find = {}
            for identifier in type_identifiers:
                identifier_split = identifier.split('/')
                to_find[identifier_split[1]] = identifier
            kwargs = {'identifier__in': list(to_find.keys())}
            item_type = ItemType.objects.get_all_types()[item_type_id]
            model = ItemType.objects.get_model(item_type_id)
            if 'language' in item_type:
                kwargs[item_type['language']] = language
            for identifier, item_id in model.objects.filter(**kwargs).values_list('identifier', item_type['foreign_key']):
                result[to_find[identifier]] = item_id
        if len(result) != len(identifiers):
            raise Exception("Can't translate the following identifiers: {}".format(set(identifiers) - set(result.keys())))
        return result
Beispiel #7
0
def learning_curve(length, context=None, users=None, user_length=None, number_of_users=1000):
    if user_length is None:
        user_length = length
    with closing(connection.cursor()) as cursor:
        cursor.execute("SELECT id FROM proso_models_answermeta WHERE content LIKE '%%random_without_options%%'")
        meta_ids = [str(x[0]) for x in cursor.fetchall()]
    EMPTY_LEARNING_CURVE = {
        'number_of_users': 0,
        'number_of_data_points': 0,
        'success': [],
        'object_type': 'learning_curve',
    }
    if len(meta_ids) == 0:
        return EMPTY_LEARNING_CURVE

    def _get_where(context, users, meta_ids):
        _where = ['metainfo_id IN ({})'.format(','.join(meta_ids))]
        _where_params = []
        if context is not None:
            _where.append('context_id = %s')
            _where_params.append(context)
        if users is not None:
            _where.append('user_id IN ({})'.format(','.join(['%s' for _ in users])))
            _where_params += users
        return _where, _where_params

    with closing(connection.cursor()) as cursor:
        where, where_params = _get_where(context, users, meta_ids)
        cursor.execute(
            '''
            SELECT
                user_id
            FROM proso_models_answer
            WHERE ''' + ' AND '.join(where) + '''
            GROUP BY context_id, user_id
            HAVING COUNT(id) >= %s
            ORDER BY RANDOM()
            LIMIT %s
            ''', where_params + [user_length, number_of_users])
        valid_users = list(set([x[0] for x in cursor.fetchall()]))
    if len(valid_users) == 0:
        return EMPTY_LEARNING_CURVE
    with closing(connection.cursor()) as cursor:
        where, where_params = _get_where(context, valid_users, meta_ids)
        cursor.execute(
            '''
            SELECT
                context_id,
                user_id,
                item_asked_id = COALESCE(item_answered_id, -1)
            FROM proso_models_answer
            WHERE ''' + ' AND '.join(where) + '''
            ORDER BY id
            ''', where_params)
        context_answers = defaultdict(lambda: defaultdict(list))
        for row in cursor:
            context_answers[row[0]][row[1]].append(row[2])
        user_answers = [
            answers[:min(len(answers), length)] + [None for _ in range(length - min(len(answers), length))]
            for user_answers in context_answers.values()
            for answers in user_answers.values()
            if len(answers) >= user_length
        ]

        def _mean_with_confidence(xs):
            return confidence_value_to_json(binomial_confidence_mean([x for x in xs if x is not None]))

        return {
            'number_of_users': len(valid_users),
            'number_of_datapoints': len(user_answers),
            'success': list(map(_mean_with_confidence, list(zip(*user_answers)))),
            'object_type': 'learning_curve',
        }