def _reachable_graph(self, item_ids, neighbors): return {i: deps for i, deps in fixed_point( is_zero=lambda xs: len(xs) == 0, minus=lambda xs, ys: {x: vs for (x, vs) in xs.items() if x not in ys}, plus=lambda xs, ys: dict(list(xs.items()) + list(ys.items())), f=neighbors, x={None: item_ids}).items() if len(deps) > 0}
def _reachable_items(self, graph): return {i: sorted(list(fixed_point( is_zero=lambda xs: len(xs) == 0, minus=lambda xs, ys: xs - ys, plus=lambda xs, ys: xs | ys, f=lambda xs: reduce(lambda a, b: a | b, [set(graph.get(x, [])) for x in xs], set()), x={i} ) - {i})) for i in graph[None]}
def filter_all_reachable_leaves_many(self, identifier_filters, language): """ Provides the same functionality as .. py:method:: ItemManager.filter_all_reachable_leaves(), but for more filters in the same time. Args: identifier_filters: list of identifier filters language (str): language used for further filtering (some objects for different languages share the same item Returns: list: list of list of item ids """ for i, identifier_filter in enumerate(identifier_filters): if len(identifier_filter) == 1 and not isinstance(identifier_filter[0], list): identifier_filters[i] = [identifier_filter] if any([len(xs) == 1 and xs[0].startswith('-') for xs in identifier_filter]): raise Exception('Filter containing only one identifier with "-" prefix is not allowed.') item_identifiers = [ identifier[1:] if identifier.startswith('-') else identifier for identifier_filter in identifier_filters for identifier in set(flatten(identifier_filter)) ] translated = self.translate_identifiers(item_identifiers, language) leaves = self.get_leaves(list(translated.values())) result = [] for identifier_filter in identifier_filters: filter_result = set() for inner_filter in identifier_filter: inner_result = None inner_neg_result = set() if len(inner_filter) == 0: raise Exception('Empty nested filters are not allowed.') for identifier in inner_filter: if identifier.startswith('-'): inner_neg_result |= set(leaves[translated[identifier[1:]]]) elif inner_result is None: inner_result = set(leaves[translated[identifier]]) else: inner_result &= set(leaves[translated[identifier]]) filter_result |= inner_result - inner_neg_result result.append(sorted(list(filter_result))) return result
def to_json(self, nested=False): return { 'id': self.id, 'object_type': 'environment_info', 'status': dict(list(EnvironmentInfo.STATUS))[self.status], 'revision': self.revision, 'updated': self.updated.strftime('%Y-%m-%d %H:%M:%S'), 'created': self.created.strftime('%Y-%m-%d %H:%M:%S'), 'config': json.loads(self.config.content), }
def override_children_subgraph(self, children_subgraph, invisible_edges=None): """ Get all items with outcoming edges from the given subgraph, drop all their child relations, and then add children according to the given subgraph. Args: children_subgraph (dict): item id -> list of chidlren (item ids) invisible_edges (list|set): set of (from, to) tuples specifying invisible edges """ with transaction.atomic(): if invisible_edges is None: invisible_edges = set() parents = list(children_subgraph.keys()) all_old_relations = dict(proso.list.group_by( list(ItemRelation.objects.filter(parent_id__in=parents)), by=lambda relation: relation.parent_id )) to_delete = set() for parent_id, children in children_subgraph.items(): old_relations = { relation.child_id: relation for relation in all_old_relations.get(parent_id, []) } for child_id in children: if child_id not in old_relations: ItemRelation.objects.create( parent_id=parent_id, child_id=child_id, visible=(parent_id, child_id) not in invisible_edges ) elif old_relations[child_id].visible != (parent_id, child_id) not in invisible_edges: old_relations[child_id].visible = (parent_id, child_id) not in invisible_edges old_relations[child_id].save() to_delete |= {old_relations[child_id].pk for child_id in set(old_relations.keys()) - set(children)} ItemRelation.objects.filter(pk__in=to_delete).delete()
def translate_identifiers(self, identifiers, language): """ Translate a list of identifiers to item ids. Identifier is a string of the following form: <model_prefix>/<model_identifier> where <model_prefix> is any suffix of database table of the given model which uniquely specifies the table, and <model_identifier> is identifier of the object. Args: identifiers (list[str]): list of identifiers language (str): language used for further filtering (some objects for different languages share the same item Returns: dict: identifier -> item id """ result = {} identifiers = set(identifiers) item_types = ItemType.objects.get_all_types() for item_type_id, type_identifiers in proso.list.group_by(identifiers, by=lambda identifier: self.get_item_type_id_from_identifier(identifier, item_types)).items(): to_find = {} for identifier in type_identifiers: identifier_split = identifier.split('/') to_find[identifier_split[1]] = identifier kwargs = {'identifier__in': list(to_find.keys())} item_type = ItemType.objects.get_all_types()[item_type_id] model = ItemType.objects.get_model(item_type_id) if 'language' in item_type: kwargs[item_type['language']] = language for identifier, item_id in model.objects.filter(**kwargs).values_list('identifier', item_type['foreign_key']): result[to_find[identifier]] = item_id if len(result) != len(identifiers): raise Exception("Can't translate the following identifiers: {}".format(set(identifiers) - set(result.keys()))) return result
def learning_curve(length, context=None, users=None, user_length=None, number_of_users=1000): if user_length is None: user_length = length with closing(connection.cursor()) as cursor: cursor.execute("SELECT id FROM proso_models_answermeta WHERE content LIKE '%%random_without_options%%'") meta_ids = [str(x[0]) for x in cursor.fetchall()] EMPTY_LEARNING_CURVE = { 'number_of_users': 0, 'number_of_data_points': 0, 'success': [], 'object_type': 'learning_curve', } if len(meta_ids) == 0: return EMPTY_LEARNING_CURVE def _get_where(context, users, meta_ids): _where = ['metainfo_id IN ({})'.format(','.join(meta_ids))] _where_params = [] if context is not None: _where.append('context_id = %s') _where_params.append(context) if users is not None: _where.append('user_id IN ({})'.format(','.join(['%s' for _ in users]))) _where_params += users return _where, _where_params with closing(connection.cursor()) as cursor: where, where_params = _get_where(context, users, meta_ids) cursor.execute( ''' SELECT user_id FROM proso_models_answer WHERE ''' + ' AND '.join(where) + ''' GROUP BY context_id, user_id HAVING COUNT(id) >= %s ORDER BY RANDOM() LIMIT %s ''', where_params + [user_length, number_of_users]) valid_users = list(set([x[0] for x in cursor.fetchall()])) if len(valid_users) == 0: return EMPTY_LEARNING_CURVE with closing(connection.cursor()) as cursor: where, where_params = _get_where(context, valid_users, meta_ids) cursor.execute( ''' SELECT context_id, user_id, item_asked_id = COALESCE(item_answered_id, -1) FROM proso_models_answer WHERE ''' + ' AND '.join(where) + ''' ORDER BY id ''', where_params) context_answers = defaultdict(lambda: defaultdict(list)) for row in cursor: context_answers[row[0]][row[1]].append(row[2]) user_answers = [ answers[:min(len(answers), length)] + [None for _ in range(length - min(len(answers), length))] for user_answers in context_answers.values() for answers in user_answers.values() if len(answers) >= user_length ] def _mean_with_confidence(xs): return confidence_value_to_json(binomial_confidence_mean([x for x in xs if x is not None])) return { 'number_of_users': len(valid_users), 'number_of_datapoints': len(user_answers), 'success': list(map(_mean_with_confidence, list(zip(*user_answers)))), 'object_type': 'learning_curve', }