def identify_resource(resource):

    # With resource_dictize we get the correct resource url
    # even if dataset is in draft state

    task_id = make_uuid()

    resource_dict = resource_dictize(resource, {'model': model})
    context = _make_default_context()
    #celery.send_task(
    #    'vectorstorer.identify',
    #    args=[resource_dict, context],
    #    countdown=15,
    #    task_id=task_id)
    jobs.enqueue('vectorstorer.identify', [args], countdown=15)

    res_identify = model.Session.query(ResourceIngest).filter(
        ResourceIngest.resource_id == resource.id).first()
    if res_identify:
        # This is when a user had previously rejected the ingestion workflow,
        # but now wants to re-identify the resource
        model.Session.delete(res_identify)
        new_res_identify = ResourceIngest(task_id, resource.id,
                                          ResourceStorerType.VECTOR)
        model.Session.add(new_res_identify)
        model.Session.commit()
    else:
        # A newly created/updated resource needs to be identified
        new_res_identify = ResourceIngest(task_id, resource.id,
                                          ResourceStorerType.VECTOR)
        model.Session.add(new_res_identify)
def delete_vector_storer_task(resource, pkg_delete=False):
    user = _get_site_user()
    data = None
    resource_list_to_delete = None
    if (resource['format'] == settings.WMS_FORMAT
            or resource['format'] == settings.DB_TABLE_FORMAT
        ) and resource.has_key('vectorstorer_resource'):
        data = json.dumps(resource)
        if pkg_delete:
            resource_list_to_delete = _get_child_resources(resource)
    else:
        data = json.dumps(resource)
        resource_list_to_delete = _get_child_resources(resource)
    context = json.dumps({
        'resource_list_to_delete': resource_list_to_delete,
        'site_url': _get_site_url(),
        'apikey': user.get('apikey'),
        'site_user_apikey': user.get('apikey'),
        'user': user.get('name'),
        'db_params': config['ckan.datastore.write_url']
    })
    geoserver_context = _get_geoserver_context()
    jobs.enqueue(tasks.vectorstorer_delete, [geoserver_context, context, data])
    if resource.has_key('vectorstorer_resource') and not pkg_delete:
        _delete_child_resources(resource)
Esempio n. 3
0
def enqueue_update_datajson_cache_tasks():
    # Las funciones que usamos de RQ requieren que se les envíe el context para evitar problemas de autorización
    try:
        context = {'model': model, 'session': model.Session, 'user': c.user}
        delete.job_clear(context, {'queues': [ANDINO_DATAJSON_QUEUE]})
    except logic.NotAuthorized:
        logger.info(u'Usuario %s no tiene permisos para administrar colas de trabajo. No es posible limpiar las colas previo actualización de data.json', c.user)
    jobs.enqueue(update_datajson_cache, queue=ANDINO_DATAJSON_QUEUE)
    jobs.enqueue(update_catalog, queue=ANDINO_DATAJSON_QUEUE)
Esempio n. 4
0
def compat_enqueue(name, fn, args=None):
    u'''
    Enqueue a background job using Celery or RQ.
    '''
    try:
        # Try to use RQ
        from ckan.lib.jobs import enqueue
        enqueue(fn, args=args)
    except ImportError:
        # Fallback to Celery
        from ckan.lib.celery_app import celery
        celery.send_task(name, args=args)
def create_delete_resource_task(resource):
    """
    Creates the celery task for raster resource deletion
    :param resource: the resource to be deleted
    """
    context = _make_default_context()
    context['resource_dict'] = resource
    task_id = make_uuid()
    #celery.send_task(
    #    'rasterstorer.delete',
    #    args=[context],
    #    task_id=task_id
    #)
    jobs.enqueue('rasterstorer.delete', [context])
def create_ingest_resource_task(resource):
    """
    Creates the celery task for raster resource ingestion
    :param resource: the resource to be ingested
    """
    task_id = make_uuid()
    context = _make_default_context()
    resource_dict = resource.as_dict()
    context['resource_dict'] = resource_dict
    #celery.send_task(
    #    'rasterstorer.import',
    #    args=[context],
    #    task_id=task_id
    #)
    jobs.enqueue('rasterstorer.import', [context])
Esempio n. 7
0
 def enqueue(self, job=None, *args, **kwargs):
     u'''
     Enqueue a test job.
     '''
     if job is None:
         job = jobs.test_job
     return jobs.enqueue(job, *args, **kwargs)
Esempio n. 8
0
 def enqueue(self, job=None, *args, **kwargs):
     u"""
     Enqueue a test job.
     """
     if job is None:
         job = jobs.test_job
     return jobs.enqueue(job, *args, **kwargs)
Esempio n. 9
0
def convert_json_state_to_rdf(id, resource_id):
    """
    This creates json-stat to rdf conversion as background jobs and appropriate message will appear.
    Main conversion module is in the file controllers -> jsonstatToRDF.py file
    """
    datasetid = toolkit.request.params.get(
        'datasetId', u'') or toolkit.request.form.get('datasetId', u'')
    vocabulary_namespace = toolkit.request.params.get(
        'VocabNmSpace', u'') or toolkit.request.form.get('VocabNmSpace', u'')
    data_namespace = toolkit.request.params.get(
        'DataNmSpace', u'') or toolkit.request.form.get('DataNmSpace', u'')

    job = jobs.enqueue(
        RdfConv.convertToRDF,
        [resource_id, datasetid, vocabulary_namespace, data_namespace, id])
    task_id = job.id

    return {
        "message":
        toolkit.
        _('RDF file being created. Please visit the dataset page after few minutes. '
          'If you dont see the RDF file after a while, please contact administrator '
          'along with the Job id:' + task_id),
        "id":
        id
    }
def update_vector_storer_task(resource):
    user = _get_site_user()
    resource_package_id = resource.as_dict()['package_id']
    resource_list_to_delete = _get_child_resources(resource.as_dict())
    context = json.dumps({
        'resource_list_to_delete': resource_list_to_delete,
        'package_id': resource_package_id,
        'site_url': _get_site_url(),
        'apikey': user.get('apikey'),
        'site_user_apikey': user.get('apikey'),
        'user': user.get('name'),
        'db_params': config['ckan.datastore.write_url']
    })
    geoserver_context = _get_geoserver_context()
    data = json.dumps(resource_dictize(resource, {'model': model}))
    log.debug('update vectorstore task')
    jobs.enqueue(tasks.vectorstorer_update, [geoserver_context, context, data])
def update_ingest_resource(resource):
    package_id = resource.as_dict()['package_id']
    resource_list_to_delete = _get_child_resources(resource.as_dict())
    context = _make_default_context()
    context.update({
        'resource_list_to_delete': resource_list_to_delete,
        'package_id': package_id,
        'db_params': config['ckan.datastore.write_url'],
    })
    backend_context = _make_backend_context()
    resource_dict = resource_dictize(resource, {'model': model})
    task_id = make_uuid()
    #celery.send_task(
    #    'vectorstorer.update',
    #    args=[resource_dict, context, backend_context],
    #    task_id=task_id)
    jobs.enqueue('vectorstorer.update', [args])
def create_identify_resource_task(resource):
    """
    Creates the celery task to identify the resource
    :param resource: the resource to be identified
    """

    task_id = make_uuid()
    
    # We are using resource_dictize() just to force CKAN to provide an absolute url
    # Note Maybe a more clean way to achive this would be to call something like 
    # url_for(controller='package', action='resource_download', id=package_id, resource_id=resource_id)
    package_id = resource.as_dict()['package_id']
    resource_dict = resource_dictize(resource, {'model': model})
    resource_dict['package_id'] = package_id
    
    context = _make_default_context()
    context['resource_dict'] = resource_dict
    #celery.send_task(
    #    'rasterstorer.identify',
    #    args=[context],
    #    task_id=task_id
    #)
    jobs.enqueue('rasterstorer.identify', [context])

    res_identify = model.Session.query(ResourceIngest).filter(
        ResourceIngest.resource_id == resource.id).first()
    if res_identify:
        # This is when a user had previously rejected the ingestion workflow,
        # but now wants to re-identify the resource
        model.Session.delete(res_identify)
        new_res_identify = ResourceIngest(
            task_id,
            resource.id,
            ResourceStorerType.RASTER
        )
        model.Session.add(new_res_identify)
        model.Session.commit()
    else:
        # A newly created/updated resource needs to be identified
        new_res_identify = ResourceIngest(
            task_id,
            resource.id,
            ResourceStorerType.RASTER
        )
        model.Session.add(new_res_identify)
Esempio n. 13
0
 def thread_show(self, slug, id):
     thread = Thread.get_by_id(id=id)
     if not thread:
         abort(404)
     if not thread.active and (not c.userobj or not c.userobj.sysadmin):
         abort(404)
     form = CreatePostForm(tk.request.POST)
     if tk.request.POST:
         if c.userobj is None:
             tk.redirect_to(tk.url_for(controller='user', action='login'))
         if BannedUser.check_by_id(c.userobj):
             flash_error(tk._('You are banned'))
             tk.redirect_to(thread.get_absolute_url())
         if form.validate():
             post = tk.get_action('forum_create_post')(
                 {
                     'auth_user_obj': c.userobj
                 }, {
                     'thread_id': id,
                     'content': form.data['content']
                 })
             if post:
                 jobs.enqueue(
                     send_notifications_on_new_post,
                     [post, tk.request.environ.get('CKAN_LANG')])
                 flash_success(tk._('You successfully create comment'))
             else:
                 flash_error(tk._('Thread is closed for comments'))
             return tk.redirect_to(thread.get_absolute_url())
         else:
             flash_error(tk._('You have errors in form'))
     page = get_page_number(tk.request.params)
     total_rows = Post.filter_thread(thread.id).count()
     total_pages = (total_rows / self.paginated_by + 1) or 1
     if not 0 < page <= total_pages:
         # redirect for delete page parameter reason
         redirect_to('forum_index')
     posts_list = Post.filter_thread(thread.id).offset(
         (page - 1) * self.paginated_by).limit(self.paginated_by)
     c.page = Page(collection=posts_list,
                   page=page,
                   item_count=total_rows,
                   items_per_page=self.paginated_by)
     context = {'thread': thread, 'form': form, 'posts': posts_list}
     return self.__render('thread.html', context)
Esempio n. 14
0
 def change_app_status(self, id, status):
     if c.userobj is None or not c.userobj.sysadmin:
         tk.redirect_to(tk.url_for(controller='user', action='login'))
     try:
         app_id = int(id)
     except ValueError:
         abort(404)
     app = App.get_by_id(id=app_id)
     if not app:
         abort(404)
     app.status = status
     app.closed_message = ""
     app.save()
     if app.status == 'active':
         jobs.enqueue(send_notifications_on_change_app_status,
                      [app, 'active',
                       tk.request.environ.get('CKAN_LANG')])
     tk.redirect_to(tk.url_for('apps_activity'))
def identify_resource(resource_obj):
    user_api_key = _get_site_user()['apikey']
    res_dict = resource_dictize(resource_obj, {'model': model})
    resource = resource_obj.as_dict()
    '''With resource_dictize we get the correct resource url even if dataset is in draft state   '''

    resource['url'] = res_dict['url']

    data = json.dumps(resource)
    job = jobs.enqueue(tasks.identify_resource, [data, user_api_key])
def create_vector_storer_task(resource, extra_params=None):
    user = _get_site_user()
    resource_package_id = resource.as_dict()['package_id']
    cont = {
        'package_id': resource_package_id,
        'site_url': _get_site_url(),
        'apikey': user.get('apikey'),
        'site_user_apikey': user.get('apikey'),
        'user': user.get('name'),
        'db_params': config['ckan.datastore.write_url']
    }
    if extra_params:
        for key, value in extra_params.iteritems():
            cont[key] = value

    context = json.dumps(cont)
    geoserver_context = _get_geoserver_context()
    data = json.dumps(resource_dictize(resource, {'model': model}))
    log.debug('create vectorstore task')
    jobs.enqueue(tasks.vectorstorer_upload, [geoserver_context, context, data])
Esempio n. 17
0
    def notify(self, entity, operation=None):
        context = {'model': model, 'ignore_auth': True, 'defer_commit': True}

        if isinstance(entity, model.Resource):
            if not operation:
                #This happens on IResourceURLChange, but I'm not sure whether
                #to make this into a webhook.
                return
            elif operation == DomainObjectOperation.new:
                topic = 'resource/create'

            if operation == DomainObjectOperation.changed:
                topic = 'resource/update'

            elif operation == DomainObjectOperation.deleted:
                topic = 'resource/delete'

            else:
                return

        if isinstance(entity, model.Package):
            if operation == DomainObjectOperation.new:
                topic = 'dataset/create'

            elif operation == DomainObjectOperation.changed:
                topic = 'dataset/update'

            elif operation == DomainObjectOperation.deleted:
                topic = 'dataset/delete'

            else:
                return

        webhooks = db.Webhook.find(topic=topic)

        for hook in webhooks:
            resource = table_dictize(entity, context)
            webhook = table_dictize(hook, context)
            jobs.enqueue(tasks.notify_hooks,
                         [resource, webhook,
                          config.get('ckan.site_url')])
Esempio n. 18
0
 def app_add(self):
     if c.userobj is None:
         tk.redirect_to(
             tk.url_for(controller='user',
                        action='login',
                        came_from=full_current_url()))
     form = CreateAppForm(tk.request.POST)
     data_dict = clean_dict(
         dict_fns.unflatten(tuplize_dict(parse_params(tk.request.params))))
     upload = uploader.get_uploader('apps')
     if tk.request.POST:
         if form.validate():
             # Upload[load image
             upload.update_data_dict(data_dict, 'image_url', 'image_upload',
                                     'clear_upload')
             try:
                 upload.upload(uploader.get_max_image_size())
             except logic.ValidationError as err:
                 flash_error(err.error_dict['image_upload'][0])
             else:
                 app = App()
                 form.populate_obj(app)
                 app.author_id = c.userobj.id
                 app.content = strip_tags(app.content)
                 app.status = "pending"
                 app.image_url = data_dict.get('image_url')
                 app.save()
                 log.debug("App data is valid. Content: %s",
                           strip_tags(app.name))
                 flash_success(tk._('You successfully create app'))
                 jobs.enqueue(
                     send_notifications_on_change_app_status,
                     [app, 'pending',
                      tk.request.environ.get('CKAN_LANG')])
                 tk.redirect_to(app.get_absolute_url())
         else:
             flash_error(tk._('You have errors in form'))
             log.info("Validate errors: %s", form.errors)
     context = {'form': form, 'active_boards': Board.filter_active()}
     log.debug('ForumController.thread_add context: %s', context)
     return self.__render('create_app.html', context)
Esempio n. 19
0
def broken_links():
    QUEUE_NAME = 'default'
    queue = jobs.get_queue(QUEUE_NAME)
    queue._default_timeout = 3600 * 24
    try:
        toolkit.check_access('sysadmin', {'user': g.user, model: model})
    except toolkit.NotAuthorized:
        return toolkit.abort(403)
    filepath = toolkit.config['spc.report.broken_links_filepath']
    try:
        last_check = datetime.fromtimestamp(os.stat(filepath).st_mtime)
    except OSError:
        last_check = None
    active_jobs_count = jobs.get_queue(QUEUE_NAME).count
    if request.method == 'POST':
        action = request.form.get('action')
        if action == 'download' and last_check:
            return send_file(
                filepath,
                as_attachment=True,
                attachment_filename='SPC-BrokenLinks-{:%Y-%m-%d}.csv'.format(
                    last_check))

        elif action == 'start':
            jobs.enqueue(broken_links_report,
                         kwargs={'recepients': [g.user]},
                         queue=QUEUE_NAME)
            h.flash_notice('Report generation in progress. '
                           'You will recieve email notification '
                           'as soon as report finished')
            return h.redirect_to('spc_admin.broken_links')
    if active_jobs_count:
        h.flash_error('There are unfinished '
                      'report generation processes in progress. '
                      'You will not be able to manually start checker '
                      'until they are finished.')
    extra_vars = {
        'last_check': last_check,
        'active_jobs_count': active_jobs_count
    }
    return toolkit.render('admin/broken_links.html', extra_vars)
def create_ingest_resource(resource, layer_params):
    package_id = resource.as_dict()['package_id']
    context = _make_default_context()
    context.update({
        'package_id': package_id,
        'db_params': config['ckan.datastore.write_url'],
        'layer_params': layer_params
    })
    backend_context = _make_backend_context()
    resource_dict = resource_dictize(resource, {'model': model})
    task_id = make_uuid()
    #celery.send_task(
    #    'vectorstorer.upload',
    #    args=[resource_dict, context, backend_context],
    #    task_id=task_id)
    jobs.enqueue('vectorstorer.upload', [args])

    res_ingest = model.Session.query(ResourceIngest).filter(
        ResourceIngest.resource_id == resource.id).first()
    res_ingest.status = IngestStatus.PUBLISHED
    res_ingest.celery_task_id = task_id
    model.Session.commit()
def test(queues):
    """Enqueue a test job. If no queue names are given then the job is
    added to the default queue. If queue names are given then a
    separate test job is added to each of the queues.

    """
    for queue in queues or [bg_jobs.DEFAULT_QUEUE_NAME]:
        job = bg_jobs.enqueue(bg_jobs.test_job, [u"A test job"],
                              title=u"A test job",
                              queue=queue)
        click.secho(
            u'Added test job {} to queue "{}"'.format(job.id, queue),
            fg=u"green",
        )
Esempio n. 22
0
 def close_app(self, id):
     if c.userobj is None or not c.userobj.sysadmin:
         tk.redirect_to(tk.url_for(controller='user', action='login'))
     app = App.get_by_id(id=id)
     if not app:
         tk.redirect_to(tk.url_for('apps_activity'))
     form = CloseAppForm(tk.request.POST)
     if tk.request.POST:
         if form.validate():
             form.populate_obj(app)
             app.status = "close"
             app.save()
             log.debug("Closed app")
             jobs.enqueue(
                 send_notifications_on_change_app_status,
                 [app, 'close',
                  tk.request.environ.get('CKAN_LANG')])
             flash_success(tk._('You successfully closed app'))
             tk.redirect_to(tk.url_for('apps_activity'))
         else:
             flash_error(tk._('You have errors in form'))
             log.debug("Validate errors: %s", form.errors)
     context = {'form': form}
     return self.__render('close_app.html', context)
Esempio n. 23
0
def csv_upload_datasets():

    self = PublisherRecordsUpload()
    unauthorized = self._validate_users()
    if unauthorized:
        return unauthorized

    if request.method == "GET":
        return render('csv/upload.html')
    else:
        vars = dict()
        csv_file = request.files['file']

        try:
            _data = self._validate_csv_files(csv_file)
        except ValidationError as e:
            h.flash_error(_(e.error_dict['message']))
            return h.redirect_to('spreadsheet.csv_upload_datasets')

        vars['file_name'] = csv_file.filename
        data = io.BytesIO(_data)
        log.info("Reading CSV file for upload....")
        reader = csv.reader(data)
        columns = next(reader)
        tasks = list()

        for row_no, row in enumerate(reader):
            task = OrderedDict()
            row_dict = OrderedDict()

            # try catch block for each row.
            for i, x in enumerate(row):
                row_dict[columns[i].encode('utf-8')] = x.encode('utf-8')

            task[u'title'] = u"No Title" if not row_dict.get(
                u'title', '') else row_dict.get(u'title')
            task[u'task_id'] = str(uuid.uuid4())
            job = jobs.enqueue(records_upload_process, [
                json.dumps([row_dict], ensure_ascii=False).encode('utf-8'),
                c.user
            ])
            task[u'task_id'] = str(job.id)
            tasks.append(json.dumps(task))

        vars[u'tasks'] = tasks

        return render('csv/result.html', extra_vars=vars)
Esempio n. 24
0
    def convertToRDFJobs(self):
        """ This creates json-stat to rdf conversion as background jobs and appropriate message will appear."""

        resource_id = request.params.get('resource_id', u'')
        datasetid = request.params.get('datasetId', u'')
        vocabulary_namespace = request.params.get('VocabNmSpace', u'')
        data_namespace = request.params.get('DataNmSpace', u'')
        pkg_id = request.params.get('pkg_id', u'')

        job = jobs.enqueue(convertToRDF, [
            resource_id, datasetid, vocabulary_namespace, data_namespace,
            pkg_id
        ])
        task_id = job.id
        h.flash_notice(
            _('RDF being created and may take a while. please visit the dataset page after few minutes. If you dont see the RDF file after a while, please contact administrator along with the Job id: '
              + task_id))
        tk.redirect_to(controller='package', action='read', id=pkg_id)
Esempio n. 25
0
    def run_archiver_after_package_create_update(package_id):
        """
        Run archiver after package update or package create.

        Note: There is no access control here. Be careful on where this is called from. This is the fix for
        Issue: https://github.com/IATI/ckanext-iati/issues/270

        :param package_id: str
        :return: None
        """

        if not package_id:
            log.error("No package id available. Cannot run the archiver.")
            return None

        job = jobs.enqueue(arch.run, [package_id, None, None])
        log.info("Triggered background job for package: {}".format(package_id))
        log.info("Job id: {}".format(job.id))

        return None
Esempio n. 26
0
def organization_update(context, data_dict):

    # Check if state is set from pending to active so we can notify users
    old_org_dict = p.toolkit.get_action('organization_show')(
        {}, {
            'id': data_dict.get('id') or data_dict.get('name')
        })
    old_state = old_org_dict.get('state')

    new_org_dict = update_core.organization_update(context, data_dict)
    new_state = new_org_dict.get('state')

    if old_state == 'approval_needed' and new_state == 'active':
        # Notify users
        _send_activation_notification_email(context, new_org_dict)
        h.flash_success(
            'Publisher activated, a notification email has been sent to its administrators.'
        )

    old_org_name = old_org_dict.get('name', '')
    new_org_name = new_org_dict.get('name', '')

    # Only sysadmin is allowed to change the publisher id (no need of any check here)
    if old_org_name != new_org_name:
        log.info(
            "Organization name changed - updating package name in background job"
        )
        job = jobs.enqueue(
            publisher_tasks.update_organization_dataset_names,
            [old_org_name, new_org_name,
             new_org_dict.get('id', '')])
        log.info("Job id: {}".format(job.id))
        h.flash_success(
            'Please reload the page after sometime to reflect change in publisher id for all datasets.'
        )
    return new_org_dict
Esempio n. 27
0
def translate(timeout=180):
    script = S3_import_taxonomy_term_translations()
    job = jobs.enqueue(script.run, args=[], timeout=timeout)
    job_id = job.id
    message = "Job has been assigned!!"
    return {'job_id': job_id, 'message': message, 'timeout': timeout}
Esempio n. 28
0
 def broken_links_report(self):
     jobs.enqueue(broken_links_report, timeout=7200)
Esempio n. 29
0
    def post(self, view_type, id):
        """

        :param view_type:
        :param id:
        :return:
        """
        publisher_id = None
        package_id = None
        if view_type == "publisher":
            publisher_id = id
        else:
            package_id = id

        context = {
            u'model': model,
            u'session': model.Session,
            u'site_url': config.get('ckan.site_url'),
            u'user': config.get('iati.admin_user.name'),
            u'apikey': config.get('iati.admin_user.api_key'),
            u'api_version': 3,
        }

        if not c.user:
            p.toolkit.abort(
                403,
                'Permission denied, only system administrators can run the archiver.'
            )

        self.is_sysadmin = authz.is_sysadmin(c.user)

        if not self.is_sysadmin:
            # User does not have permissions on any publisher
            p.toolkit.abort(
                403,
                'Permission denied, only system administrators can run the archiver.'
            )

        tasks = []
        pkg_stat = {}

        if package_id:
            package_ids = [package_id]
        elif publisher_id:
            try:
                org = p.toolkit.get_action('organization_show')(
                    context, {
                        'id': publisher_id,
                        'include_datasets': False
                    })
                pkg_stat['group_dict'] = org
            except p.toolkit.ObjectNotFound:
                pkg_stat['status'] = "Error"
                pkg_stat['message'] = 'Could not find Publisher: {0}'.format(
                    publisher_id)
                return pkg_stat

            # Assuming max packages for publishers is 1000
            package_ids = p.toolkit.get_action('package_search')(
                context, {
                    "fq": "organization:{}".format(org["name"]),
                    'rows': 1000
                })["results"]

        else:
            try:
                package_ids = p.toolkit.get_action('package_list')(context, {})
            except p.toolkit.ObjectNotFound:
                pkg_stat['status'] = "Error"
                pkg_stat['message'] = 'Could not find Publisher: {0}'.format(
                    publisher_id)
                return pkg_stat

        for index, _pkg in enumerate(package_ids):
            if isinstance(_pkg, dict):
                pkg_id = _pkg['name']
            else:
                pkg_id = _pkg
            task = OrderedDict()

            job = jobs.enqueue(arch.run, [pkg_id, None, publisher_id])

            task[u'task_id'] = job.id
            task[u'name'] = pkg_id
            task[u'status'] = 'Queued'
            if publisher_id:
                task[u'title'] = _pkg[u'title']
            else:
                pkg = p.toolkit.get_action('package_show')(context, {
                    'id': pkg_id
                })
                task[u'title'] = pkg['title']
                pkg_stat['pkg'] = pkg
            tasks.append(json.dumps(task))

        pkg_stat['status'] = "success"
        pkg_stat['message'] = "All jobs are initiated successfully"
        pkg_stat['tasks'] = tasks
        if publisher_id:
            pkg_stat['id'] = publisher_id
        else:
            pkg_stat['id'] = id

        if publisher_id:
            pkg_stat['from_publisher'] = True

        return render('user/archiver_result.html', extra_vars=pkg_stat)
Esempio n. 30
0
def schedule_data_quality_check(package_id, force_recalculate=False):
    jobs.enqueue(calculate_metrics, [package_id, force_recalculate])
Esempio n. 31
0
    def upload(self):
        from pylons import config

        if not c.user:
            p.toolkit.abort(401, 'Permission denied, only publisher administrators can manage CSV files.')

        self.is_sysadmin = authz.is_sysadmin(c.user)

        # Orgs of which the logged user is admin
        context = {'model': model, 'user': c.user or c.author}
        self.authz_orgs = p.toolkit.get_action('organization_list_for_user')(context, {})

        if not self.is_sysadmin and not self.authz_orgs:
            # User does not have permissions on any publisher
            p.toolkit.abort(401, 'Permission denied, only publisher administrators can manage CSV files.')

        if p.toolkit.request.method == 'GET':
            return p.toolkit.render('csv/upload.html')
        elif p.toolkit.request.method == 'POST':
            csv_file = p.toolkit.request.POST['file']
            if not hasattr(csv_file, 'filename'):
                p.toolkit.abort(400, 'No CSV file provided')
            vars = {}
            vars['file_name'] = csv_file.filename
            fieldnames = [f[0] for f in CSV_MAPPING]
            warnings = {}
            errors = {}
            data = csv_file.file.read()
            data = StringIO.StringIO(data)
            try:
                reader = csv.reader(data)
                columns = next(reader)

                missing_columns = [f for f in fieldnames if f not in columns and f not in OPTIONAL_COLUMNS]
                surplus_columns = [f for f in columns if f not in fieldnames]

                if len(surplus_columns):
                    warnings = {'Ignoring extra columns': '%s' % ', '.join(sorted(surplus_columns))}

                if len(missing_columns):
                    errors = {'Missing columns': '%s' % ', '.join(sorted(missing_columns))}

                for entry in reader:
                    if len(entry) <= 12:
                        p.toolkit.abort(400, "Please make sure there are no line breaks in the file.")
                data.seek(0)
                reader = csv.reader(data)
                next(reader)

                publishers_in_csv = [row[0] for row in reader]
                all_publishers = p.toolkit.get_action('group_list')({}, {})

                unknown_publishers = [publisher
                    for publisher in publishers_in_csv
                    if publisher not in all_publishers
                ]

                if len(unknown_publishers):
                    unknown_publishers_string = ', '.join(unknown_publishers)
                    p.toolkit.abort(400, 'Unknown publisher(s): %s' % (unknown_publishers_string))
                data.seek(0)
                reader = csv.reader(data)
                next(reader)

                if not len(errors.keys()):
                    json_data = []
                    for row in reader:
                        d = OrderedDict()
                        for i, x in enumerate(row):
                            d[columns[i]] = x
                        json_data.append(d)
                    ckan_ini_filepath = os.path.abspath(config['__file__'])
                    if not json_data:
                        p.toolkit.abort(400, 'No data found in CSV file.')
                    job = jobs.enqueue(read_csv_file, [ckan_ini_filepath, json.dumps(json_data), c.user])
                    vars['task_id'] = job.id
                else:
                    p.toolkit.abort(400, ('Error in CSV file : {0}; {1}'.format
                                    (re.sub("([\{\}'])+", "", str(warnings)),
                                        re.sub("([\{\}'])+", "", str(errors)))))
            except Exception as e:
                vars['errors'] = errors
                vars['warnings'] = warnings
                p.toolkit.abort(400, ('Error opening CSV file: {0}'.format(e.message)))

            return p.toolkit.render('csv/result.html', extra_vars=vars)