def organisation_dataset_scores(organisation_name, include_sub_organisations=False): ''' Returns a dictionary detailing openness scores for the organisation for each dataset. i.e.: {'publisher_name': 'cabinet-office', 'publisher_title:': 'Cabinet Office', 'data': [ {'package_name', 'package_title', 'resource_url', 'openness_score', 'reason', 'last_updated', 'is_broken', 'format'} ...] NB the list does not contain datasets that have 0 resources and therefore score 0 ''' values = {} sql = """ select package.id as package_id, task_status.key as task_status_key, task_status.value as task_status_value, task_status.error as task_status_error, task_status.last_updated as task_status_last_updated, resource.id as resource_id, resource.url as resource_url, resource.position, package.title as package_title, package.name as package_name, "group".id as publisher_id, "group".name as publisher_name, "group".title as publisher_title from resource left join task_status on task_status.entity_id = resource.id left join resource_group on resource.resource_group_id = resource_group.id left join package on resource_group.package_id = package.id left join member on member.table_id = package.id left join "group" on member.group_id = "group".id where entity_id in (select entity_id from task_status where task_status.task_type='qa') and package.state = 'active' and resource.state='active' and resource_group.state='active' and "group".state='active' and task_status.task_type='qa' and task_status.key='status' %(org_filter)s order by package.title, package.name, resource.position """ sql_options = {} org = model.Group.by_name(organisation_name) if not org: abort(404, 'Publisher not found') organisation_title = org.title if not include_sub_organisations: sql_options['org_filter'] = 'and "group".name = :org_name' values['org_name'] = organisation_name else: sub_org_filters = [ '"group".name=\'%s\'' % org.name for org in go_down_tree(org) ] sql_options['org_filter'] = 'and (%s)' % ' or '.join(sub_org_filters) rows = model.Session.execute(sql % sql_options, values) data = dict() # dataset_name: {properties} for row in rows: package_data = data.get(row.package_name) if not package_data: package_data = OrderedDict(( ('dataset_title', row.package_title), ('dataset_name', row.package_name), ('publisher_title', row.publisher_title), ('publisher_name', row.publisher_name), # the rest are placeholders to hold the details # of the highest scoring resource ('resource_position', None), ('resource_id', None), ('resource_url', None), ('openness_score', None), ('openness_score_reason', None), ('last_updated', None), )) if row.task_status_value > package_data['openness_score']: package_data['resource_position'] = row.position package_data['resource_id'] = row.resource_id package_data['resource_url'] = row.resource_url try: package_data.update(json.loads(row.task_status_error)) except ValueError, e: log.error( 'QA status "error" should have been in JSON format, but found: "%s" %s', task_status_error, e) package_data[ 'reason'] = 'Could not display reason due to a system error' package_data['openness_score'] = row.task_status_value package_data['openness_score_reason'] = package_data[ 'reason'] # deprecated package_data['last_updated'] = row.task_status_last_updated data[row.package_name] = package_data
def openness_index(include_sub_organizations=False): '''Returns the counts of 5 stars of openness for all organizations.''' context = {'model': model, 'session': model.Session, 'ignore_auth': True} total_score_counts = Counter() counts = {} # Get all the scores and build up the results by org for org in model.Session.query(model.Group)\ .filter(model.Group.type == 'organization')\ .filter(model.Group.state == 'active').all(): scores = [] for pkg in org.packages(): try: qa = p.toolkit.get_action('qa_package_openness_show')( context, { 'id': pkg.id }) except p.toolkit.ObjectNotFound: log.warning('No QA info for package %s', pkg.name) return scores.append(qa['openness_score']) score_counts = Counter(scores) total_score_counts += score_counts counts[org.name] = { 'organization_title': org.title, 'score_counts': score_counts, } counts_with_sub_orgs = copy.deepcopy(counts) # new dict if include_sub_organizations: for org_name in counts_with_sub_orgs: org = model.Group.by_name(org_name) for sub_org_id, sub_org_name, sub_org_title, sub_org_parent_id \ in org.get_children_group_hierarchy(type='organization'): if sub_org_name not in counts: # occurs only if there is an organization created since the last loop? continue counts_with_sub_orgs[org_name]['score_counts'] += \ counts[sub_org_name]['score_counts'] results = counts_with_sub_orgs else: results = counts table = [] for org_name, org_counts in sorted(results.iteritems(), key=lambda r: r[0]): total_stars = sum( [k * v for k, v in org_counts['score_counts'].items() if k]) num_pkgs_scored = sum([ v for k, v in org_counts['score_counts'].items() if k is not None ]) average_stars = round(float(total_stars) / num_pkgs_scored, 1) \ if num_pkgs_scored else 0.0 row = OrderedDict(( ('organization_title', results[org_name]['organization_title']), ('organization_name', org_name), ('total_stars', total_stars), ('average_stars', average_stars), )) row.update(jsonify_counter(org_counts['score_counts'])) table.append(row) # Get total number of packages & resources num_packages = model.Session.query(model.Package)\ .filter_by(state='active')\ .count() return { 'table': table, 'total_score_counts': jsonify_counter(total_score_counts), 'num_packages_scored': sum(total_score_counts.values()), 'num_packages': num_packages, }
def organisation_dataset_scores(organisation_name, include_sub_organisations=False): ''' Returns a dictionary detailing openness scores for the organisation for each dataset. i.e.: {'publisher_name': 'cabinet-office', 'publisher_title:': 'Cabinet Office', 'data': [ {'package_name', 'package_title', 'resource_url', 'openness_score', 'reason', 'last_updated', 'is_broken', 'format'} ...] NB the list does not contain datasets that have 0 resources and therefore score 0 ''' values = {} sql = """ select package.id as package_id, task_status.key as task_status_key, task_status.value as task_status_value, task_status.error as task_status_error, task_status.last_updated as task_status_last_updated, resource.id as resource_id, resource.url as resource_url, resource.position, package.title as package_title, package.name as package_name, "group".id as publisher_id, "group".name as publisher_name, "group".title as publisher_title from resource left join task_status on task_status.entity_id = resource.id left join resource_group on resource.resource_group_id = resource_group.id left join package on resource_group.package_id = package.id left join member on member.table_id = package.id left join "group" on member.group_id = "group".id where entity_id in (select entity_id from task_status where task_status.task_type='qa') and package.state = 'active' and resource.state='active' and resource_group.state='active' and "group".state='active' and task_status.task_type='qa' and task_status.key='status' %(org_filter)s order by package.title, package.name, resource.position """ sql_options = {} org = model.Group.by_name(organisation_name) if not org: abort(404, 'Publisher not found') organisation_title = org.title if not include_sub_organisations: sql_options['org_filter'] = 'and "group".name = :org_name' values['org_name'] = organisation_name else: sub_org_filters = ['"group".name=\'%s\'' % org.name for org in go_down_tree(org)] sql_options['org_filter'] = 'and (%s)' % ' or '.join(sub_org_filters) rows = model.Session.execute(sql % sql_options, values) data = dict() # dataset_name: {properties} for row in rows: package_data = data.get(row.package_name) if not package_data: package_data = OrderedDict(( ('dataset_title', row.package_title), ('dataset_name', row.package_name), ('publisher_title', row.publisher_title), ('publisher_name', row.publisher_name), # the rest are placeholders to hold the details # of the highest scoring resource ('resource_position', None), ('resource_id', None), ('resource_url', None), ('openness_score', None), ('openness_score_reason', None), ('last_updated', None), )) if row.task_status_value > package_data['openness_score']: package_data['resource_position'] = row.position package_data['resource_id'] = row.resource_id package_data['resource_url'] = row.resource_url try: package_data.update(json.loads(row.task_status_error)) except ValueError, e: log.error('QA status "error" should have been in JSON format, but found: "%s" %s', task_status_error, e) package_data['reason'] = 'Could not display reason due to a system error' package_data['openness_score'] = row.task_status_value package_data['openness_score_reason'] = package_data['reason'] # deprecated package_data['last_updated'] = row.task_status_last_updated data[row.package_name] = package_data
def openness_index(include_sub_organizations=False): '''Returns the counts of 5 stars of openness for all organizations.''' context = {'model': model, 'session': model.Session, 'ignore_auth': True} total_score_counts = Counter() counts = {} # Get all the scores and build up the results by org for org in model.Session.query(model.Group)\ .filter(model.Group.type == 'organization')\ .filter(model.Group.state == 'active').all(): scores = [] for pkg in org.packages(): try: qa = p.toolkit.get_action('qa_package_openness_show')(context, {'id': pkg.id}) except p.toolkit.ObjectNotFound: log.warning('No QA info for package %s', pkg.name) return scores.append(qa['openness_score']) score_counts = Counter(scores) total_score_counts += score_counts counts[org.name] = { 'organization_title': org.title, 'score_counts': score_counts, } counts_with_sub_orgs = copy.deepcopy(counts) # new dict if include_sub_organizations: for org_name in counts_with_sub_orgs: org = model.Group.by_name(org_name) for sub_org_id, sub_org_name, sub_org_title, sub_org_parent_id \ in org.get_children_group_hierarchy(type='organization'): if sub_org_name not in counts: # occurs only if there is an organization created since the last loop? continue counts_with_sub_orgs[org_name]['score_counts'] += \ counts[sub_org_name]['score_counts'] results = counts_with_sub_orgs else: results = counts table = [] for org_name, org_counts in sorted(results.iteritems(), key=lambda r: r[0]): total_stars = sum([k*v for k, v in org_counts['score_counts'].items() if k]) num_pkgs_scored = sum([v for k, v in org_counts['score_counts'].items() if k is not None]) average_stars = round(float(total_stars) / num_pkgs_scored, 1) \ if num_pkgs_scored else 0.0 row = OrderedDict(( ('organization_title', results[org_name]['organization_title']), ('organization_name', org_name), ('total_stars', total_stars), ('average_stars', average_stars), )) row.update(jsonify_counter(org_counts['score_counts'])) table.append(row) # Get total number of packages & resources num_packages = model.Session.query(model.Package)\ .filter_by(state='active')\ .count() return {'table': table, 'total_score_counts': jsonify_counter(total_score_counts), 'num_packages_scored': sum(total_score_counts.values()), 'num_packages': num_packages, }