Example #1
0
def dgu_group_update(context, data_dict):
    """
    Group edit permission.  Checks that a valid user is supplied and that the user is
    a member of the group with a capacity of admin.
    """
    model = context['model']
    user = context.get('user','')
    group = get_group_object(context, data_dict)

    if not user:
        return {'success': False, 'msg': _('Only members of this group are authorized to edit this group')}

    # Sys admins should be allowed to update groups
    if Authorizer().is_sysadmin(unicode(user)):
        return { 'success': True }

    # Only allow package update if the user and package groups intersect
    user_obj = model.User.get( user )
    if not user_obj:
        return { 'success' : False, 'msg': _('Could not find user %s') % str(user) }

    parent_groups = list(publib.go_up_tree(group))

    # Check if user is an admin of a parent group, and if so allow them to edit.
    if _groups_intersect( user_obj.get_groups('publisher', 'admin'), parent_groups ):
        return {'success': True}

    # Check admin of just this group
    if _groups_intersect( user_obj.get_groups('publisher', 'admin'), [group] ):
        return {'success': True}

    return { 'success': False, 'msg': _('User %s not authorized to edit this group') % str(user) }
Example #2
0
def app_dataset_report():
    app_dataset_dicts = []
    for related in model.Session.query(model.RelatedDataset) \
                        .filter(model.Related.type=='App') \
                        .all():
        dataset = related.dataset
        org = dataset.get_organization()
        top_org = list(go_up_tree(org))[-1]

        app_dataset_dict = OrderedDict((
            ('app title', related.related.title),
            ('app url', related.related.url),
            ('dataset name', dataset.name),
            ('dataset title', dataset.title),
            ('organization title', org.title),
            ('organization name', org.name),
            ('top-level organization title', top_org.title),
            ('top-level organization name', top_org.name),
            ('dataset theme', related.dataset.extras.get('theme-primary', '')),
            ('dataset notes', lib.dataset_notes(dataset)),
            ))
        app_dataset_dicts.append(app_dataset_dict)

    app_dataset_dicts.sort(key=lambda row: row['top-level organization title']
                           + row['organization title'])

    return {'table': app_dataset_dicts}
Example #3
0
    def _send_application( self, group, reason  ):
        from ckan.logic.action import error_summary
        from ckan.lib.mailer import mail_recipient
        from genshi.template.text import NewTextTemplate
        from pylons import config

        if not reason:
            h.flash_error(_("There was a problem with your submission, \
                             please correct it and try again"))
            errors = {"reason": ["No reason was supplied"]}
            return self.apply(group.id, errors=errors,
                              error_summary=error_summary(errors))

        # look for publisher admins up the tree
        recipients = []
        recipient_publisher = None
        for publisher in go_up_tree(group):
            admins = publisher.members_of_type(model.User, 'admin').all()
            if admins:
                recipients = [(u.fullname,u.email) for u in admins]
                recipient_publisher = publisher.title
                break
        if not recipients:
            if not config.get('dgu.admin.email'):
                log.error('User "%s" prevented from applying for publisher access for "%s" '
                          'because: dgu.admin.email is not setup in CKAN config.',
                          c.user, group.name)
                h.flash_error(_("There is a problem with the system configuration"))
                errors = {"reason": ["%s does not have an administrator user to contact" % group.name]}
                return self.apply(group.id, data=data, errors=errors,
                                  error_summary=error_summary(errors))                
            recipients = [(config.get('dgu.admin.name', "DGU Admin"),
                           config['dgu.admin.email'])]
            recipient_publisher = 'data.gov.uk admin'

            
        log.debug('User "%s" requested publisher access for "%s" which was sent to admin %s (%r) with reason: %r',
                  c.user, group.name, recipient_publisher, recipients, reason)
        extra_vars = {
            'group'    : group,
            'requester': c.userobj,
            'reason'   : reason
        }
        email_msg = render("email/join_publisher_request.txt", extra_vars=extra_vars,
                           loader_class=NewTextTemplate)

        try:
            for (name,recipient) in recipients:
                mail_recipient(name,
                               recipient,
                               "Publisher request",
                               email_msg)
        except Exception, e:
            h.flash_error(_("There is a problem with the system configuration"))
            errors = {"reason": ["No mail server was found"]}
            log.error('User "%s" prevented from applying for publisher access for "%s" because of mail configuration error: %s',
                      c.user, group.name, e)
            return self.apply(group.id, errors=errors,
                              error_summary=error_summary(errors))
Example #4
0
def html_datasets_report():
    '''
    Returns datasets that only have an HTML link, by organization.
    '''
    # Get packages
    pkgs = model.Session.query(model.Package)\
                .filter_by(state='active')

    # See if HTML
    num_datasets_published = 0
    num_datasets_only_html = 0
    datasets_by_publisher_only_html = collections.defaultdict(list)
    # use yield_per, otherwise memory use just goes up til the script is killed
    # by the os.
    for pkg in pkgs.yield_per(100):
        if p.toolkit.asbool(pkg.extras.get('unpublished')):
            continue
        num_datasets_published += 1

        formats = set([
            res.format.lower() for res in pkg.resources
            if res.resource_type != 'documentation'
        ])
        if 'html' not in formats:
            continue
        org = pkg.get_organization().name

        data_formats = formats - set(('asp', '', None))
        if data_formats == set(('html', )):
            num_datasets_only_html += 1
            datasets_by_publisher_only_html[org].append((pkg.name, pkg.title))

    rows = []
    for org_name, datasets_only_html in sorted(
            datasets_by_publisher_only_html.iteritems(),
            key=lambda x: -len(x[1])):
        org = model.Session.query(model.Group) \
                   .filter_by(name=org_name) \
                   .first()
        top_org = list(go_up_tree(org))[-1]

        row = OrderedDict((
            ('organization title', org.title),
            ('organization name', org.name),
            ('top-level organization title', top_org.title),
            ('top-level organization name', top_org.name),
            ('num datasets only html', len(datasets_only_html)),
            ('name datasets only html', ' '.join(d[0]
                                                 for d in datasets_only_html)),
            ('title datasets only html',
             '|'.join(d[1] for d in datasets_only_html)),
        ))
        rows.append(row)

    return {
        'table': rows,
        'num_datasets_published': num_datasets_published,
        'num_datasets_only_html': num_datasets_only_html,
    }
Example #5
0
def html_datasets_report():
    '''
    Returns datasets that only have an HTML link, by organization.
    '''
    # Get packages
    pkgs = model.Session.query(model.Package)\
                .filter_by(state='active')

    # See if HTML
    num_datasets_published = 0
    num_datasets_only_html = 0
    datasets_by_publisher_only_html = collections.defaultdict(list)
    # use yield_per, otherwise memory use just goes up til the script is killed
    # by the os.
    for pkg in pkgs.yield_per(100):
        if p.toolkit.asbool(pkg.extras.get('unpublished')):
            continue
        num_datasets_published += 1

        formats = set([res.format.lower() for res in pkg.resources
                       if res.resource_type != 'documentation'])
        if 'html' not in formats:
            continue
        org = pkg.get_organization().name

        data_formats = formats - set(('asp', '', None))
        if data_formats == set(('html',)):
            num_datasets_only_html += 1
            datasets_by_publisher_only_html[org].append((pkg.name, pkg.title))

    rows = []
    for org_name, datasets_only_html in sorted(
            datasets_by_publisher_only_html.iteritems(),
            key=lambda x: -len(x[1])):
        org = model.Session.query(model.Group) \
                   .filter_by(name=org_name) \
                   .first()
        top_org = list(go_up_tree(org))[-1]

        row = OrderedDict((
            ('organization title', org.title),
            ('organization name', org.name),
            ('top-level organization title', top_org.title),
            ('top-level organization name', top_org.name),
            ('num datasets only html', len(datasets_only_html)),
            ('name datasets only html',
             ' '.join(d[0] for d in datasets_only_html)),
            ('title datasets only html',
             '|'.join(d[1] for d in datasets_only_html)),
            ))
        rows.append(row)

    return {'table': rows,
            'num_datasets_published': num_datasets_published,
            'num_datasets_only_html': num_datasets_only_html,
            }
Example #6
0
def user_is_rm(user, org=None):
    from pylons import config
    from ast import literal_eval
    from ckanext.dgu.lib.publisher import go_up_tree

    relationship_managers = literal_eval(config.get('dgu.relationship_managers', '{}'))

    allowed_orgs = relationship_managers.get(user.name, [])

    if org:
        for o in go_up_tree(org):
            if o.name in allowed_orgs:
                return True

        return False
    else:
        # Are they RM of any org?
        return len(allowed_orgs) > 0
Example #7
0
def dgu_package_update(context, data_dict):
    model = context['model']
    user = context.get('user')
    user_obj = model.User.get( user )
    package = get_package_object(context, data_dict)

    if Authorizer().is_sysadmin(user_obj):
        return {'success': True}

    fail = {'success': False,
            'msg': _('User %s not authorized to edit packages in these groups') % str(user)}

    # Only sysadmins can edit UKLP packages.
    # Note: the harvest user *is* a sysadmin
    # Note: if changing this, check the code and comments in
    #       ckanext/forms/dataset_form.py:DatasetForm.form_to_db_schema_options()
    if package.extras.get('UKLP', '') == 'True':
        return fail

    # Only sysadmins can edit ONS packages.
    # Note: the dgu user *is* a sysadmin
    if package.extras.get('external_reference') == 'ONSHUB':
        return fail

    # To be able to edit this dataset the user is allowed if
    # (they are an 'editor' for this publisher) OR
    # (an admin for this publisher OR parent publishers).
    if not user_obj:
        return fail

    package_group = package.get_groups('publisher')
    parent_groups = list(publib.go_up_tree(package_group[0])) if package_group else []

    # Check admin of this or parent groups.
    if _groups_intersect( user_obj.get_groups('publisher', 'admin'), parent_groups ):
        return {'success': True}

    # Check admin or editor of just this group
    if _groups_intersect( user_obj.get_groups('publisher'), package_group ):
        return {'success': True}

    return fail
Example #8
0
def dgu_dataset_delete(context, data_dict):
    """
    Determines whether a dataset's state can be set to "deleted".

    Currently only sysadmin users can do this, apart from UKLP.
    """
    model = context['model']
    user = context.get('user')
    if not user:
        return {'success': False}
    user_obj = model.User.get(user)
    package = get_package_object(context, data_dict)

    if Authorizer().is_sysadmin(user_obj):
        return {'success': True}

    if package.extras.get('UKLP', '') != 'True':
        return {'success': False}

    # To be able to delete this dataset the user is allowed if
    # (they are an 'editor' for this publisher) OR
    # (an admin for this publisher OR parent publishers).
    if not user_obj:
        return {'success': False}

    package_group = package.get_groups('publisher')
    parent_groups = list(publib.go_up_tree(package_group[0])) if package_group else []

    # Check admin of this or parent groups.
    if _groups_intersect( user_obj.get_groups('publisher', 'admin'), parent_groups ):
        return {'success': True}

    # Check admin or editor of just this group
    if _groups_intersect( user_obj.get_groups('publisher'), package_group ):
        return {'success': True}

    return {'success': False}
def publisher_report(metric):
    orgs = dict(model.Session.query(model.Group.name, model.Group)\
                     .filter_by(state='active').all())

    org_counts = collections.defaultdict(dict)
    if metric in ('views', 'viewsdownloads', 'visits'):
        if metric == 'views' or metric == 'viewsdownloads':
            sql_function = 'sum(pageviews::int)'
        elif metric == 'visits':
            sql_function = 'sum(visits::int)'
        q = '''
            select department_id, period_name, %s metric
            from ga_url
            where department_id <> ''
            and package_id <> ''
            group by department_id, period_name
            order by department_id
        ''' % sql_function

        org_period_count = model.Session.connection().execute(q)

        for org_name, period_name, count in org_period_count:
            org_counts[org_name][period_name] = count

    if metric in ('downloads', 'viewsdownloads'):
        q = '''
            select g.name as org_name, s.period_name, sum(s.value::int) as downloads
            from GA_Stat as s
            join Package as p on s.key=p.name
            join "group" as g on p.owner_org=g.id
            where stat_name='Downloads'
            and g.state='active'
            group by org_name, s.period_name
            order by downloads desc;
            '''
        org_period_count = model.Session.connection().execute(q)

        if metric == 'viewsdownloads':
            # add it onto the existing counts
            for org_name, period_name, count in org_period_count:
                org_counts[org_name][period_name] = count + \
                    org_counts[org_name].get(period_name, 0)
                org_counts[org_name]['All'] = count + \
                    org_counts[org_name].get('All', 0)
        else:
            for org_name, period_name, count in org_period_count:
                org_counts[org_name][period_name] = count
                org_counts[org_name]['All'] = count + \
                    org_counts[org_name].get('All', 0)

    org_counts = sorted(org_counts.items(),
                        key=lambda x: -x[1].get('All', 0))

    all_periods = [
        res[0] for res in model.Session.query(GA_Url.period_name)
                               .group_by(GA_Url.period_name)
                               .order_by(GA_Url.period_name)
                               .all()]
    rows = []
    for org_name, counts in org_counts:
        org = orgs.get(org_name)
        if not org:
            continue
        top_org = list(go_up_tree(org))[-1]

        row = OrderedDict((
            ('organization title', org.title),
            ('organization name', org.name),
            ('top-level organization title', top_org.title),
            ('top-level organization name', top_org.name),
            ))
        for period_name in all_periods:
            row[period_name] = counts.get(period_name, 0)
        rows.append(row)

    # Group the periods by year, to help the template draw the table nicely
    #all_periods_tuples = [period.split('-') for period in all_periods
    #                      if '-' in period]
    #all_periods_tuples.sort(key=lambda x: x[0])
    #all_periods_by_year = [
    #    (year, [p for y, p in year_periods])
    #    for year, year_periods in groupby(all_periods_tuples, lambda x: x[0])]

    return {'table': rows,
            'all periods': all_periods,
            #'all periods by year': all_periods_by_year
            }
Example #10
0
def publisher_report(metric):
    orgs = dict(model.Session.query(model.Group.name, model.Group)\
                     .filter_by(state='active').all())

    org_counts = collections.defaultdict(dict)
    if metric in ('views', 'viewsdownloads', 'visits'):
        if metric == 'views' or metric == 'viewsdownloads':
            sql_function = 'sum(pageviews::int)'
        elif metric == 'visits':
            sql_function = 'sum(visits::int)'
        q = '''
            select department_id, period_name, %s metric
            from ga_url
            where department_id <> ''
            and package_id <> ''
            group by department_id, period_name
            order by department_id
        ''' % sql_function

        org_period_count = model.Session.connection().execute(q)

        for org_name, period_name, count in org_period_count:
            org_counts[org_name][period_name] = count

    if metric in ('downloads', 'viewsdownloads'):
        q = '''
            select g.name as org_name, s.period_name, sum(s.value::int) as downloads
            from GA_Stat as s
            join Package as p on s.key=p.name
            join "group" as g on p.owner_org=g.id
            where stat_name='Downloads'
            and g.state='active'
            group by org_name, s.period_name
            order by downloads desc;
            '''
        org_period_count = model.Session.connection().execute(q)

        if metric == 'viewsdownloads':
            # add it onto the existing counts
            for org_name, period_name, count in org_period_count:
                org_counts[org_name][period_name] = count + \
                    org_counts[org_name].get(period_name, 0)
                org_counts[org_name]['All'] = count + \
                    org_counts[org_name].get('All', 0)
        else:
            for org_name, period_name, count in org_period_count:
                org_counts[org_name][period_name] = count
                org_counts[org_name]['All'] = count + \
                    org_counts[org_name].get('All', 0)

    org_counts = sorted(org_counts.items(), key=lambda x: -x[1].get('All', 0))

    all_periods = [
        res[0] for res in model.Session.query(GA_Url.period_name).group_by(
            GA_Url.period_name).order_by(GA_Url.period_name).all()
    ]
    rows = []
    for org_name, counts in org_counts:
        org = orgs.get(org_name)
        if not org:
            continue
        top_org = list(go_up_tree(org))[-1]

        row = OrderedDict((
            ('organization title', org.title),
            ('organization name', org.name),
            ('top-level organization title', top_org.title),
            ('top-level organization name', top_org.name),
        ))
        for period_name in all_periods:
            row[period_name] = counts.get(period_name, 0)
        rows.append(row)

    # Group the periods by year, to help the template draw the table nicely
    #all_periods_tuples = [period.split('-') for period in all_periods
    #                      if '-' in period]
    #all_periods_tuples.sort(key=lambda x: x[0])
    #all_periods_by_year = [
    #    (year, [p for y, p in year_periods])
    #    for year, year_periods in groupby(all_periods_tuples, lambda x: x[0])]

    return {
        'table': rows,
        'all periods': all_periods,
        #'all periods by year': all_periods_by_year
    }