class Posts(DomainObject, Indexed): doctype = 'post' indexed = [ mapped('id', 'entity_id'), 'title', 'description', 'created_at', 'created_by', 'entity_type', 'entity_ref', 'comment_count', 'like_count', unprefixed('idx_keywords'), unprefixed('idx_tags'), unprefixed('idx_research_questions'), ] @classmethod def get(cls, ref): return Session.query(cls).get(ref) @classmethod def before_index(cls, data): if 'entity_type' not in data: return data related_data_ids = { 'idx_tags': [], 'idx_keywords': [], 'idx_research_questions': [], } related_data_loaders = { 'dataset': cls.load_related_data_dataset, 'dashboard': cls.load_related_data_dashboard, 'research_question': cls.load_related_data_research_question, 'visualization': cls.load_related_data_visualization, } if data['entity_type'] not in related_data_loaders: log.error('Cannot load data for related entity of type %s.', data['entity_type']) return data try: log.info('Indexing related entity %s with ref %s', data['entity_type'], data['entity_ref']) related_data_loaders[data['entity_type']]( data['entity_ref'], related_data_ids, ) except Exception as e: log.error( 'Failed to load related data ' 'for %s (with id=%s). Error: %s', data['entity_type'], data['entity_ref'], str(e)) log.exception(e) data.update(related_data_ids) return data @classmethod def _call_action(cls, action, data): try: return toolkit.get_action(action)({ 'ignore_auth': True, }, data) except Exception as e: log.error('Failed while calling action %s. Args: %s. Error: %s', action, str(data), str(e)) log.exception(e) return None @classmethod def _try_clean_list(cls, data, prop): value = data.get(prop) if value is None: return [] if isinstance(value, set): return list(value) if isinstance(value, str) or isinstance(value, unicode): return list( filter(None, map(lambda v: v.strip(), value.split(',')))) raise Exception('Cannot extract list from %s (type=%s)', str(value), type(value)) @classmethod def _load_tags_and_keywords(cls, tag_ids, related_data_ids): for tag in tag_ids: tag = cls._call_action('tag_show', {'id': tag}) if not tag: continue if tag['id'] not in related_data_ids['idx_tags']: related_data_ids['idx_tags'].append(tag['id']) if tag.get('keyword_id') and \ tag['keyword_id'] not in related_data_ids: related_data_ids['idx_keywords'].append(tag['keyword_id']) @classmethod def load_related_data_dataset(cls, _id, related_data_ids): dataset = cls._call_action('package_show', {'id': _id}) if not dataset: return for tag in dataset.get('tags', []): if tag['id'] not in related_data_ids['idx_tags']: related_data_ids['idx_tags'].append(tag['id']) tag = cls._call_action('tag_show', {'id': tag['id']}) if tag and tag.get('keyword_id') and \ tag['keyword_id'] not in related_data_ids['idx_keywords']: related_data_ids['idx_keywords'].append(tag['keyword_id']) @classmethod def load_related_data_research_question(cls, _id, related_data_ids): if _id not in related_data_ids['idx_research_questions']: related_data_ids['idx_research_questions'].append(_id) rq = cls._call_action('research_question_show', {'id': _id}) if not rq: return tags = cls._try_clean_list(rq, 'tags') cls._load_tags_and_keywords(tags, related_data_ids) @classmethod def load_related_data_visualization(cls, _id, related_data_ids): resource_view = cls._call_action('resource_view_show', {'id': _id}) if not resource_view: return extras = resource_view.get('__extras', {}) for rq_title in extras.get('research_questions', []): rq = cls._call_action('search_research_questions', { 'text': rq_title, }) if rq and rq.get('count', 0) > 0: rq = rq['results'][0] cls.load_related_data_research_question( rq['id'], related_data_ids) tags = cls._try_clean_list(extras, 'tags') cls._load_tags_and_keywords(tags, related_data_ids) @classmethod def load_related_data_dashboard(cls, _id, related_data_ids): dashboard = cls._call_action('dashboard_show', {'id': _id}) if not dashboard: return tags = cls._try_clean_list(dashboard, 'tags') cls._load_tags_and_keywords(tags, related_data_ids) indicators = (dashboard.get('indicators', '') or '').strip() if indicators: indicators = json.loads(indicators) for indicator in indicators: if indicator.get('research_question'): cls.load_related_data_research_question( indicator['research_question'], related_data_ids) if indicator.get('resource_view_id'): cls.load_related_data_visualization( indicator['resource_view_id'], related_data_ids, ) datasets = cls._try_clean_list(dashboard, 'datasets') for dataset in datasets: dataset = dataset.strip() if not dataset: continue cls.load_related_data_dataset(dataset, related_data_ids)
class Visualization(ResourceView, Indexed): indexed = [ mapped('id', 'entity_id'), 'resource_id', 'title', 'description', 'view_type', 'research_questions', 'package_id', 'keywords', mapped('tags', 'tags'), mapped('organizations', 'organizations'), mapped('groups', 'groups'), unprefixed('idx_keywords'), unprefixed('idx_tags'), unprefixed('idx_research_questions'), unprefixed('permission_labels'), ] doctype = 'visualization' @staticmethod def before_index(data): # Index only charts if data.get('view_type') not in ['chart', 'map']: raise DontIndexException(data.get('id')) permission_labels = [] resource_view = get_action('resource_view_show')({ 'ignore_auth': True }, { 'id': data['id'] }) data['package_id'] = resource_view['package_id'] package = get_action('package_show')({ 'ignore_auth': True }, { 'id': data['package_id'], 'include_tracking': True }) if package: data['organizations'] = (package.get('organization', {}) or {}).get('name') organization_id = (package.get('organization', {}) or {}).get('id') if organization_id: permission_labels.append('member-%s' % organization_id) data['groups'] = [] for g in package.get('groups', []): data['groups'].append(g['name']) permission_labels.append('member-%s' % g['id']) if data.get('_sa_instance_state'): del data['_sa_instance_state'] if data.get('description') is not None: return data def _get_description(data_dict): if not data_dict.get('__extras'): return None extras = data_dict['__extras'] if data['view_type'] == 'chart': return extras.get('chart_description', '') elif data['view_type'] == 'table': return extras.get('table_description', '') elif data['view_type'] == 'map': return extras.get('map_description', '') else: # guess the description for prop, value in extras.items(): if prop == 'description' or prop.endswith('_description'): return value if not data.get('__extras'): if resource_view.get('description') is not None: data['description'] = resource_view['description'] else: data['description'] = _get_description(resource_view) else: data['description'] = _get_description(data) # get research questions rq_ids = set() if data.get('config'): conf = data.get('config') if conf.get('__extras'): ext = conf.get('__extras') if ext.get('research_questions'): data_rq = json.dumps(ext.get('research_questions')) data['research_questions'] = data_rq if isinstance(data_rq, dict): rq_ids.add(data_rq.get('id')) elif isinstance(data_rq, str): rq_ids.add(data_rq) else: if data.get('__extras'): ext = data.get('__extras') if ext.get('research_questions'): data_rq = json.dumps(ext.get('research_questions')) data['research_questions'] = data_rq if isinstance(data_rq, dict): rq_ids.add(data_rq.get('id')) elif isinstance(data_rq, str): rq_ids.add(data_rq) if rq_ids: rq_ids_list = list(rq_ids) string_ids = ','.join(rq_ids_list) clean = string_ids.replace('"', "").replace(']', "").replace("[", "") clean_list = clean.split(',') data['idx_research_questions'] = [] for rq_title in clean_list: try: rq_title = rq_title.strip(" ") rq = ResearchQuestion.get_by_id_name_or_title(rq_title) if rq: data['idx_research_questions'].append(rq.id) except Exception as e: log.warning( 'Failed to fetch research question %s. ' 'Error: %s', rq_title, str(e)) log.exception(e) keywords = set() if data.get('tags'): data['tags'] = data.get('tags').split(',') data['idx_tags'] = data['tags'] for tag in data['tags']: tag_obj = get_action('tag_show')({ 'ignore_auth': True }, { 'id': tag }) if tag_obj.get('keyword_id'): keyword_obj = get_action('keyword_show')( { 'ignore_auth': True }, { 'id': tag_obj.get('keyword_id') }) keywords.add(keyword_obj.get('name')) if keywords: data['keywords'] = ','.join(keywords) data['idx_keywords'] = list(keywords) if permission_labels: data['permission_labels'] = permission_labels return data
class Dashboard(DomainObject, Indexed): indexed = [ mapped('id', 'entity_id'), 'name', 'title', 'description', 'type', 'source', 'indicators', 'research_questions', 'datasets', 'keywords', mapped('tags', 'tags'), mapped('groups', 'groups'), mapped('organizations', 'organizations'), mapped('created_at', 'khe_created'), mapped('modified_at', 'khe_modified'), unprefixed('idx_keywords'), unprefixed('idx_tags'), unprefixed('idx_research_questions'), unprefixed('idx_shared_with_users'), unprefixed('idx_shared_with_organiztions'), unprefixed('idx_shared_with_groups'), unprefixed('idx_datasets'), unprefixed('permission_labels'), unprefixed('idx_groups'), unprefixed('idx_organizations'), ] doctype = 'dashboard' @classmethod def before_index(cls, data): indicators = [] if data.get('indicators'): indicators = json.loads(data['indicators']) datasets = {} organizations = {} pdorganizations = {} groups = {} pdgroups = {} research_questions = {} visualizations = {} tags = {} keywords = {} research_question_ids = set() resource_view_ids = set() if data.get('type') == 'internal': for k in indicators: if k.get('research_question'): research_question_ids.add(k['research_question']) if k.get('resource_view_id'): resource_view_ids.add(k['resource_view_id']) else: if isinstance(indicators, unicode) or isinstance(indicators, str): # The indicator seems to be the research question ID research_question_ids.add(indicators) elif isinstance(indicators, list): for i in indicators: research_question_ids.add(i['research_question']) else: log.warning( 'The indicators was expected to be string/unicode ' 'or list, however %s was received. Indicators: %s', str(type(indicators)), str(indicators)) # Load packages (when external dashboard) if data.get('datasets'): for dataset_id in map( lambda _id: _id.strip(), filter(lambda _id: _id and _id.strip(), data.get('datasets', '').split(','))): pkg = cls._get_package(dataset_id) if pkg: datasets[pkg['id']] = pkg # Load the research questions related to this dashboard. for research_question_id in research_question_ids: try: rq = get_action('research_question_show')( { 'ignore_auth': True, }, { 'id': research_question_id, }) research_questions[rq['id']] = rq except Exception as e: log.warning('Failed to research question %s. Error: %s', research_question_id, str(e)) log.exception(e) # Load resource views (visualizations). Also loads the packages. for resource_view_id in resource_view_ids: try: rv = get_action('resource_view_show')({ 'ignore_auth': True, }, { 'id': resource_view_id, }) visualizations[rv['id']] = rv if rv['package_id'] not in datasets: pkg = cls._get_package(rv['package_id']) if pkg: datasets[pkg['id']] = pkg except Exception as e: log.warning('Failed to fetch resource view %s. Error: %s', resource_view_id, str(e)) log.exception(e) # Load tags and keywords if data.get('tags'): for tag_id in map( lambda t: t.strip(), filter(lambda t: t and t.strip(), data.get('tags', '').split(','))): try: tag = get_action('tag_show')({ 'ignore_auth': True, }, { 'id': tag_id, }) tags[tag['id']] = tag keyword_id = tag.get('keyword_id') if keyword_id and keyword_id not in keywords: try: keyword = get_action('keyword_show')( { 'ignore_auth': True, }, { 'id': keyword_id, }) keywords[keyword['id']] = keyword except Exception as e: log.warning( 'Failed to fetch keyword %s. ' 'Error: %s', keyword_id, str(e)) log.exception(e) except Exception as e: log.warning('Failed to fetch tag %s. Error: %s', tag_id, str(e)) log.exception(e) # set groups and organizations for _, pkg in datasets.items(): pdorganizations[pkg['id']] = {} pdgroups[pkg['id']] = {} if pkg.get('organization'): org = pkg['organization'] organizations[org['id']] = org pdorganizations[pkg['id']][org['id']] = org if pkg.get('groups'): for group in pkg['groups']: groups[group['id']] = group pdgroups[pkg['id']][org['id']] = org # Handle if dataset is explicitly shared with an organization or # group: the dataset may be treated as it belongs to that group. exp_shared_orgs = get_as_list('shared_with_organizations', pkg) exp_shared_groups = get_as_list('shared_with_groups', pkg) all_groups = {} all_groups.update(organizations) all_groups.update(groups) for exp_groups, is_org in [(exp_shared_orgs, True), (exp_shared_groups, False)]: for group_id in exp_groups: if group_id not in all_groups: group = cls._get_group(group_id, is_org) if group: if is_org: organizations[group['id']] = group pdorganizations[pkg['id']][group['id']] = group else: groups[group['id']] = group pdgroups[pkg['id']][group['id']] = group else: group = all_groups[group_id] if is_org: pdorganizations[pkg['id']][group['id']] = group else: pdgroups[pkg['id']][group['id']] = group # Set data data['datasets'] = ','.join(datasets.keys()) data['idx_datasets'] = list(datasets.keys()) data['research_questions'] = ','.join( map(lambda (_, rq): rq['title'], research_questions.items())) data['organizations'] = ','.join( map(lambda (_, o): o['name'], organizations.items())) data['groups'] = ','.join(map(lambda (_, g): g['name'], groups.items())) data['tags'] = ','.join(map(lambda (_, t): t['name'], tags.items())) data['keywords'] = ','.join( map(lambda (_, k): k['name'], keywords.items())) # Set idx_ (id index) for usage in user interests data['idx_research_questions'] = list(research_questions.keys()) data['idx_tags'] = list(map(lambda (_, t): t['name'], tags.items())) data['idx_keywords'] = list( map(lambda (_, k): k['name'], keywords.items())) permission_labels = cls._generate_dashboard_permission_labels( data, datasets, pdorganizations, pdgroups, ) if permission_labels: data['permission_labels'] = permission_labels data['idx_organizations'] = list(organizations.keys()) data['idx_groups'] = list(groups.keys()) return data @classmethod def _generate_dashboard_permission_labels(cls, data, datasets, organizations, groups): permission_labels = [] users = {} user_ids = set() if data.get('created_by'): permission_labels.append('creator-%s' % data['created_by']) if data.get('shared_with_users'): shared_with = cls._get_safe_shared_with(data['shared_with_users']) for user_id in map( lambda _id: _id.strip(), filter(lambda _id: _id and _id.strip(), shared_with.split(','))): user = cls._get_user(user_id) if user: users[user['id']] = user data['shared_with_users'] = ','.join(users.keys()) data['idx_shared_with_users'] = list(users.keys()) # Attach permission labels for explicit sharing (with user, # organization or group) permission_labels += get_permission_labels(data) # We generate a permission label that defines the access (implicit) # to this dashboard. One dashboard can be accessed by a user that # can also access ALL datasets that provide data to this dashboard. # This is implicit access. # One dashboard can use data from multiple datasets, each of those # can be accessed by the users in multiple organizations or groups. # For example a dashboard can have data from datasets D1 and D2, each # of which can be accessed by the users of: # D1(orgA, orgB, groupC) and D2(orgD, groupE) # So a user can view this dashboard if he belongs to any of the: orgA, # orgB or groupC AND at the same time belongs to orgD or groupE. # To encode this, we generate a permission label like so: # member-groups-orgA|orgB|groupC+orgD|groupE # once the prefix 'member-groups-' is removed, we are left with the # sets of groups separated with '+'. The user must belong to all of # these sets. To belong to a set it means to be part of ANY of the # group or organization in that set. The group/org ids are separeted # with '|' in the set. any_groups = [] for pkgid, _ in datasets.items(): pgroups = groups.get(pkgid, {}).keys() porgs = organizations.get(pkgid, {}).keys() if pgroups + porgs: any_groups.append(pgroups + porgs) if any_groups: combined_label = '+'.join(['|'.join(_ids) for _ids in any_groups]) permission_labels.append('match-groups-{}'.format(combined_label)) # Check each dataset, if explicitly shared with users. # If there are users that have access to all datasets, then we must # add the same permission label for those users (user-<id>) the # dashboard as well to add access to those users to this dataset. shared_users = [] for _, dataset in datasets.items(): user_with_access = set() if dataset.get('shared_with_users'): shared_with = cls._get_safe_shared_with( dataset['shared_with_users']) for user_id in map( lambda u: u.strip(), filter(lambda u: u and u.strip(), shared_with.split(','))): # We might get the ID or the name of the user user = cls._get_user(user_id) if user: user_with_access.add(user['id']) shared_users.append(user_with_access) if shared_users: if len(datasets) == 1: for user_id in shared_users[0]: permission_labels.append('user-%s' % user_id) else: user_with_access = shared_users[0] for i in range(1, len(shared_users)): user_with_access = \ user_with_access.intersection(shared_users[i]) for user_id in user_with_access: permission_labels.append('user-%s' % user_id) return permission_labels @classmethod def _get_safe_shared_with(cls, value): '''This function returns a comma separated string of the values in properties like shared_with_users. Because earlier version kept these properties differently - some were serializing a set (string value {"a","b","c"}), and some are keeping json versions of the values, this function will check for those cases and will return a comma separated string of the values always. ''' if isinstance(value, list): return ','.join(value) if value.startswith('{') and value.endswith('}'): return value[1:-1] if value.startswith('"') and value.endswith('"'): return ','.join(json.loads('[%s]' % value)) return value @classmethod def _get_user(cls, user_id): try: return get_action('user_show')({ 'ignore_auth': True, }, { 'id': user_id, }) except Exception as e: log.warning('Failed to fetch user %s. Error: %s', user_id, str(e)) log.exception(e) return None @classmethod def _get_package(cls, pkg_id): try: pkg = get_action('package_show')({ 'ignore_auth': True, }, { 'id': pkg_id, }) return pkg except Exception as e: log.warning('Failed to get package %s. Error: %s', dataset_id, str(e)) log.exception(e) return None @classmethod def _get_group(cls, group_id, is_org): action = 'organization_show' if is_org else 'group_show' try: return get_action(action)({ 'ignore_auth': True, }, { 'id': group_id }) except Exception as e: log.warning('Failed to execute %s for id %s. Error: %s', action, group_id, str(e)) log.exception(e) return None @classmethod def get(cls, reference): '''Returns a dashboard object referenced by its id or name.''' if not reference: return None dashboard = Session.query(cls).get(reference) if dashboard is None: dashboard = cls.by_name(reference) return dashboard @classmethod def delete(cls, filter): obj = Session.query(cls).filter_by(**filter).first() if obj: Session.delete(obj) Session.commit() else: raise logic.NotFound(_(u'Dashboard')) @classmethod def search(cls, **kwargs): limit = kwargs.get('limit') offset = kwargs.get('offset') order_by = kwargs.get('order_by') q = kwargs.get('q') kwargs.pop('limit', None) kwargs.pop('offset', None) kwargs.pop('order_by', None) kwargs.pop('q', None) if q: query = Session.query(cls) \ .filter(or_(cls.name.contains(q), cls.title.ilike('%' + q + '%'), cls.description.ilike('%' + q + '%'))) else: query = Session.query(cls) \ .filter_by(**kwargs) if order_by: column = order_by.split(' ')[0] order = order_by.split(' ')[1] query = query.order_by("%s %s" % (column, order)) if limit: query = query.limit(limit) if offset: query = query.offset(offset) return query
class _TestModel(Indexed): indexed = [ 'field_a', unprefixed('field_c'), mapped('field_d', 'pref_field_d'), ]
def test_unprefixed(self): result = unprefixed('prop') assert_equals({ 'field': 'prop', 'as': 'prop', }, result)
class ResearchQuestion(DomainObject, Indexed): doctype = 'research_question' indexed = [ mapped('id', 'entity_id'), 'name', 'title', 'author', 'theme_id', 'theme_name', 'theme_title', 'sub_theme_id', 'sub_theme_name', 'sub_theme_title', 'image_url', 'tags', 'keywords', mapped('tags', 'tags'), mapped('created_at', 'khe_created'), mapped('modified_at', 'khe_modified'), unprefixed('idx_keywords'), unprefixed('idx_tags'), unprefixed('idx_research_questions'), ] @classmethod def get_by_id_name_or_title(cls, id_name_title): query = Session.query(cls).filter( or_( research_question.c.id == id_name_title, research_question.c.name == id_name_title, research_question.c.title == id_name_title, )) return query.first() @classmethod def get(cls, id_or_name=None, **kwargs): q = kwargs.get('q') limit = kwargs.get('limit') offset = kwargs.get('offset') order_by = kwargs.get('order_by') kwargs.pop('q', None) kwargs.pop('limit', None) kwargs.pop('offset', None) kwargs.pop('order_by', None) query = Session.query(cls).autoflush(False) query = query.filter_by(**kwargs) if id_or_name: query = query.filter( or_(cls.id == id_or_name, cls.name == id_or_name)) if q: query = query.filter(cls.title.ilike(r"%{}%".format(q))) if order_by: query = query.order_by(order_by) if limit: query = query.limit(limit) if offset: query = query.offset(offset) return query @classmethod def get_by_id(cls, ref_id): return Session.query(cls).get(ref_id) @classmethod def update(cls, filter, data): obj = Session.query(cls).filter_by(**filter) obj.update(data) Session.commit() return obj.first() @classmethod def all(cls, theme=None, sub_theme=None, state=('active', )): # TODO Handle filtering by sub/theme properly q = Session.query(cls) if state: q = q.filter(cls.state.in_(state)) return q.order_by(cls.title) @classmethod def delete(cls, id): kwds = {'id': id} obj = Session.query(cls).\ filter_by(**kwds).first() if not obj: raise toolkit.ObjectNotFound Session.delete(obj) Session.commit() @staticmethod def before_index(data): if data.get('theme'): theme = Theme.get(data['theme']) data['theme_id'] = theme.id data['theme_name'] = theme.name data['theme_title'] = theme.title if data.get('sub_theme'): sub_theme = SubThemes.get(data['sub_theme']).first() data['sub_theme_id'] = sub_theme.id data['sub_theme_name'] = sub_theme.name data['sub_theme_title'] = sub_theme.title keywords = set() if data.get('tags'): data['tags'] = data.get('tags').split(',') data['idx_tags'] = data['tags'] for tag in data['tags']: try: tag_obj = get_action('tag_show')({ 'ignore_auth': True }, { 'id': tag }) if tag_obj.get('keyword_id'): keyword_obj = get_action('keyword_show')( { 'ignore_auth': True }, { 'id': tag_obj.get('keyword_id') }) keywords.add(keyword_obj.get('name')) if keywords: data['keywords'] = ','.join(keywords) except Exception as e: log.warning('Failed to fetch tag/keyword data. Error: %s', str(e)) if keywords: data['keywords'] = ','.join(keywords) data['idx_keywords'] = list(keywords) data['idx_research_questions'] = [data['id']] return data def __repr__(self): return '<ResearchQuestion %s>' % self.title
class _IndexedPackage(Indexed): doctype = 'package' indexed = [unprefixed('id')]