Exemple #1
0
    def _create_task(self, resource):
        user = get_action('get_site_user')({'model': model,
                                            'ignore_auth': True,
                                            'defer_commit': True}, {})
        context = json.dumps({
            'site_url': self.site_url,
            'apikey': user.get('apikey')
        })
        data = json.dumps(resource_dictize(resource, {'model': model}))

        task_id = make_uuid()
        task_status = {
            'entity_id': resource.id,
            'entity_type': u'resource',
            'task_type': u'qa',
            'key': u'celery_task_id',
            'value': task_id,
            'error': u'',
            'last_updated': datetime.now().isoformat()
        }
        task_context = {
            'model': model, 
            'user': user.get('name'),
        }
        
        get_action('task_status_update')(task_context, task_status)
        celery.send_task("qa.update", args=[context, data], task_id=task_id)
    def _create_datastorer_task(self, resource):
        user = get_action('get_site_user')({'model': model,
                                            'ignore_auth': True,
                                            'defer_commit': True}, {})
        if not hasattr(self, 'site_url'):
            from pylons import config
            self.site_url = config.get('ckan.site_url_internally') or \
                            config.get('ckan.site_url')

        context = json.dumps({
            'site_url': self.site_url,
            'apikey': user.get('apikey'),
            'site_user_apikey': user.get('apikey'),
            'username': user.get('name'),
        })
        data = json.dumps(resource_dictize(resource, {'model': model}))

        task_id = make_uuid()
        datastorer_task_status = {
            'entity_id': resource.id,
            'entity_type': u'resource',
            'task_type': u'datastorer',
            'key': u'celery_task_id',
            'value': task_id,
            'last_updated': datetime.now().isoformat()
        }
        archiver_task_context = {
            'model': model,
            'user': user.get('name'),
        }
        get_action('task_status_update')(archiver_task_context,
                                         datastorer_task_status)
        celery.send_task("datastorer.upload",
                         args=[context, data],
                         task_id=task_id)
Exemple #3
0
    def command(self):
        self.config.read('/etc/ckan/default/mobileminer.ini')
        
        ckan_url = self.config.get('settings', 'ckan_url').rstrip('/')
        api_key = self.config.get('settings', 'api_key')
        self.local = ckanapi.RemoteCKAN(ckan_url,apikey=api_key)
        
        action = self.args[0]
        
        if action == 'init':
            self.init()
        
        if action == 'minertables':
            self.minertables()

        if action == 'gsmupdate':
            celery.send_task("NAME.gsmupdate", task_id=str(uuid.uuid4()))

        if action == 'dailyusageupdate':
            celery.send_task("NAME.dailyusageupdate", task_id=str(uuid.uuid4()))
            
        if action == 'task':
            self.do_task()

        if action == 'push':
            self.push()

        if action == 'pushcells':
            self.push_cells()
            
        if action == 'gsmtest':
            self.gsm_test()
            
        if action == 'flush':
            self.flush()     
Exemple #4
0
    def upload_rdf(self):
        request = self._get_request_data()

        if not 'package_id' in request:
            abort(400, 'Please provide a suitable package_id parameter')
        elif not check_access('package_update', {'id':request['package_id']}):
            return self._finish_not_authz()

        if not 'data' in request:
            abort(400, 'Please provide a suitable data parameter')

        if not 'format' in request or request['format'] not in SUPPORTED_RDF_SYNTAXES:
            abort(400, 'Please provide a suitable format parameter')

        endpoint = model.Session.query(SparqlEndpoint).filter(SparqlEndpoint.packages.any(Package.name == request['package_id'])).first()
        if not endpoint:
            abort(404, 'No endpoint defined for provided package')

        pkg_data = {
            'id': request['package_id'],
            'sparulurl': endpoint.sparulurl,
            'storetype': endpoint.storetype,
            'graph': endpoint.graph,
            'username': endpoint.username,
            'passwd': endpoint.passwd,
            'isauthrequired': endpoint.isauthrequired,
        }

        celery.send_task('upload_rdf', args=[pkg_data, request['data'], request['format']], task_id=str(uuid.uuid4()))

        return self._finish_ok('Uploading... Check progress in package web.')
Exemple #5
0
def create_qa_update_package_task(package, queue):
    from pylons import config
    task_id = '%s-%s' % (package.name, make_uuid()[:4])
    ckan_ini_filepath = os.path.abspath(config.__file__)
    celery.send_task('qa.update_package', args=[ckan_ini_filepath, package.id],
                     task_id=task_id, queue=queue)
    log.debug('QA of package put into celery queue %s: %s', queue, package.name)
    def _create_datastorer_task(self, resource):
        user = get_action('get_site_user')({'model': model,
                                            'ignore_auth': True,
                                            'defer_commit': True}, {})

        context = json.dumps({
            'site_url': h.url_for_static('/', qualified=True),
            'apikey': user.get('apikey'),
            'site_user_apikey': user.get('apikey'),
            'username': user.get('name'),
        })
        data = json.dumps(resource_dictize(resource, {'model': model}))

        task_id = make_uuid()
        datastorer_task_status = {
            'entity_id': resource.id,
            'entity_type': u'resource',
            'task_type': u'datastorer',
            'key': u'celery_task_id',
            'value': task_id,
            'last_updated': datetime.now().isoformat()
        }
        archiver_task_context = {
            'model': model,
            'user': user.get('name'),
        }
        get_action('task_status_update')(archiver_task_context,
                                         datastorer_task_status)
        celery.send_task("datastorer.upload",
                         args=[context, data],
                         task_id=task_id)
Exemple #7
0
    def _create_task(self, resource):
        user = get_action('get_site_user')({
            'model': model,
            'ignore_auth': True,
            'defer_commit': True
        }, {})
        context = json.dumps({
            'site_url': self.site_url,
            'apikey': user.get('apikey')
        })
        data = json.dumps(resource_dictize(resource, {'model': model}))

        task_id = make_uuid()
        task_status = {
            'entity_id': resource.id,
            'entity_type': u'resource',
            'task_type': u'qa',
            'key': u'celery_task_id',
            'value': task_id,
            'error': u'',
            'last_updated': datetime.now().isoformat()
        }
        task_context = {
            'model': model,
            'user': user.get('name'),
        }

        get_action('task_status_update')(task_context, task_status)
        celery.send_task("qa.update", args=[context, data], task_id=task_id)
Exemple #8
0
    def _create_archiver_task(self, resource):
        from ckan.lib.base import c
        site_user = get_action('get_site_user')(
            {'model': model, 'ignore_auth': True, 'defer_commit': True}, {}
        )

        user = model.User.by_name(c.user)
        context = json.dumps({
            'site_url': self.site_url,
            'apikey': user.apikey,
            'username': user.name,
            'cache_url_root': self.cache_url_root,
            'site_user_apikey': site_user['apikey']
        })
        data = json.dumps(resource_dictize(resource, {'model': model}))

        task_id = make_uuid()
        archiver_task_status = {
            'entity_id': resource.id,
            'entity_type': u'resource',
            'task_type': u'archiver',
            'key': u'celery_task_id',
            'value': task_id,
            'error': u'',
            'last_updated': datetime.now().isoformat()
        }
        archiver_task_context = {
            'model': model,
            'user': site_user['name'],
            'ignore_auth': True
        }

        get_action('task_status_update')(archiver_task_context, archiver_task_status)
        celery.send_task("archiver.update", args=[context, data], task_id=task_id)
Exemple #9
0
def syndicate_dataset(package_id, topic):
    ckan_ini_filepath = os.path.abspath(config['__file__'])
    celery.send_task(
        'syndicate.sync_package',
        args=[package_id, topic, ckan_ini_filepath],
        task_id='{}-{}'.format(str(uuid.uuid4()), package_id)
    )
Exemple #10
0
def enqueue_document(user, filename, publisher):
    """
    Uses the provided data to send a job to the priority celery queue so that
    the spreadsheet is processed. We should create a job_started message header_row
    to ensure that the user sees immediately that something is happening.
    """
    from pylons import config
    from ckan import model
    from ckan.model.types import make_uuid
    from ckan.lib.celery_app import celery

    site_user = get_action('get_site_user')({
        'model': model,
        'ignore_auth': True,
        'defer_commit': True
    }, {})

    task_id = make_uuid()

    # Create the task for the queue
    context = json.dumps({
        'username':
        user.name,
        'site_url':
        config.get('ckan.site_url_internally') or config.get('ckan.site_url'),
        'apikey':
        user.apikey,
        'site_user_apikey':
        site_user['apikey']
    })
    data = json.dumps({
        'file': filename,
        'publisher': publisher.name,
        'publisher_id': publisher.id,
        'jobid': task_id
    })
    celery.send_task("inventory.process",
                     args=[context, data],
                     task_id=task_id,
                     queue='priority')

    # Create a task status.... and update it so that the user knows it has been started.
    inventory_task_status = {
        'entity_id': task_id,
        'entity_type': u'inventory',
        'task_type': u'inventory',
        'key': u'celery_task_id',
        'value': task_id,
        'error': u'',
        'state': 'Started',
        'last_updated': datetime.datetime.now().isoformat()
    }
    inventory_task_context = {
        'model': model,
        'user': user.name,
        'ignore_auth': True
    }
    res = get_action('task_status_update')(inventory_task_context,
                                           inventory_task_status)
    return res['id'], inventory_task_status['last_updated']
def identify_resource(resource):

    # With resource_dictize we get the correct resource url
    # even if dataset is in draft state

    task_id = make_uuid()

    resource_dict = resource_dictize(resource, {'model': model})
    context = _make_default_context()
    celery.send_task('vectorstorer.identify',
                     args=[resource_dict, context],
                     countdown=15,
                     task_id=task_id)

    res_identify = model.Session.query(ResourceIngest).filter(
        ResourceIngest.resource_id == resource.id).first()
    if res_identify:
        # This is when a user had previously rejected the ingestion workflow,
        # but now wants to re-identify the resource
        model.Session.delete(res_identify)
        new_res_identify = ResourceIngest(task_id, resource.id,
                                          ResourceStorerType.VECTOR)
        model.Session.add(new_res_identify)
        model.Session.commit()
    else:
        # A newly created/updated resource needs to be identified
        new_res_identify = ResourceIngest(task_id, resource.id,
                                          ResourceStorerType.VECTOR)
        model.Session.add(new_res_identify)
Exemple #12
0
def create_identify_resource_task(resource):
    """
    Creates the celery task to identify the resource
    :param resource: the resource to be identified
    """

    task_id = make_uuid()

    # We are using resource_dictize() just to force CKAN to provide an absolute url
    # Note Maybe a more clean way to achive this would be to call something like
    # url_for(controller='package', action='resource_download', id=package_id, resource_id=resource_id)
    package_id = resource.as_dict()['package_id']
    resource_dict = resource_dictize(resource, {'model': model})
    resource_dict['package_id'] = package_id

    context = _make_default_context()
    context['resource_dict'] = resource_dict
    celery.send_task('rasterstorer.identify', args=[context], task_id=task_id)

    res_identify = model.Session.query(ResourceIngest).filter(
        ResourceIngest.resource_id == resource.id).first()
    if res_identify:
        # This is when a user had previously rejected the ingestion workflow,
        # but now wants to re-identify the resource
        model.Session.delete(res_identify)
        new_res_identify = ResourceIngest(task_id, resource.id,
                                          ResourceStorerType.RASTER)
        model.Session.add(new_res_identify)
        model.Session.commit()
    else:
        # A newly created/updated resource needs to be identified
        new_res_identify = ResourceIngest(task_id, resource.id,
                                          ResourceStorerType.RASTER)
        model.Session.add(new_res_identify)
Exemple #13
0
    def command(self):
        """
        Parse command line arguments and call appropriate method.
        """
        if not self.args or self.args[0] in ["--help", "-h", "help"]:
            print Webstorer.__doc__
            return

        cmd = self.args[0]
        self._load_config()
        # import after load config so CKAN_CONFIG evironment variable can be set
        from ckan.lib.celery_app import celery
        import tasks

        user = get_action("get_site_user")({"model": model, "ignore_auth": True}, {})
        context = json.dumps(
            {
                "site_url": config["ckan.site_url"],
                "apikey": user.get("apikey"),
                "username": user.get("name"),
                "webstore_url": config.get("ckan.webstore_url"),
            }
        )
        api_url = urlparse.urljoin(config["ckan.site_url"], "api/action")

        if cmd == "update":
            response = requests.post(api_url + "/current_package_list_with_resources", "{}")
            packages = json.loads(response.content).get("result")

            for package in packages:
                for resource in package.get("resources", []):
                    data = json.dumps(resource, {"model": model})

                    if resource["webstore_url"]:
                        continue
                    mimetype = resource["mimetype"]
                    if mimetype and (
                        mimetype not in tasks.DATA_FORMATS or resource["format"] not in tasks.DATA_FORMATS
                    ):
                        continue

                    logger.info(
                        "Webstoring resource from resource %s from package %s" % (resource["url"], package["name"])
                    )

                    task_id = make_uuid()
                    datastorer_task_status = {
                        "entity_id": resource["id"],
                        "entity_type": u"resource",
                        "task_type": u"datastorer",
                        "key": u"celery_task_id",
                        "value": task_id,
                        "last_updated": datetime.now().isoformat(),
                    }
                    datastorer_task_context = {"model": model, "user": user.get("name")}

                    get_action("task_status_update")(datastorer_task_context, datastorer_task_status)
                    celery.send_task("datastorer.upload", args=[context, data], task_id=task_id)
        else:
            logger.error("Command %s not recognized" % (cmd,))
Exemple #14
0
    def _create_archiver_task(self, resource):
        from ckan.lib.base import c
        site_user = get_action('get_site_user')({
            'model': model,
            'ignore_auth': True,
            'defer_commit': True
        }, {})
        # If the code that triggers this is run from the command line, the c
        # stacked object proxy variable will not have been set up by the paste
        # registry so will give an error saying no object has been registered
        # for this thread. The easiest thing to do is to catch this, but it
        # would be nice to have a config option so that the behaviour can be
        # specified.
        try:
            c.user
        except TypeError:
            # This is no different from running the archiver from the command line:
            # See https://github.com/okfn/ckanext-archiver/blob/master/ckanext/archiver/commands.py
            username = site_user['name']
            userapikey = site_user['apikey']
        else:
            user = model.User.by_name(c.user)
            username = user.name
            userapikey = user.apikey
        context = json.dumps({
            'site_url': self.site_url,
            'apikey': userapikey,
            'username': username,
            'cache_url_root': self.cache_url_root,
            'site_user_apikey': site_user['apikey']
        })
        res_dict = resource_dictize(resource, {'model': model})
        data = json.dumps(res_dict)

        task_id = make_uuid()
        archiver_task_status = {
            'entity_id': resource.id,
            'entity_type': u'resource',
            'task_type': u'archiver',
            'key': u'celery_task_id',
            'value': task_id,
            'error': u'',
            'last_updated': datetime.now().isoformat()
        }
        archiver_task_context = {
            'model': model,
            'user': site_user['name'],
            'ignore_auth': True
        }

        get_action('task_status_update')(archiver_task_context,
                                         archiver_task_status)
        celery.send_task("archiver.update",
                         args=[context, data],
                         task_id=task_id)
        log.debug(
            'Archival of resource put into celery queue: %s url=%r user=%s site_user=%s site_url=%s',
            resource.id, res_dict.get('url'), username, site_user['name'],
            self.site_url)
Exemple #15
0
def create_qa_update_package_task(package, queue):
    from pylons import config
    task_id = '%s-%s' % (package.name, make_uuid()[:4])
    ckan_ini_filepath = os.path.abspath(config.__file__)
    celery.send_task('qa.update_package', args=[ckan_ini_filepath, package.id],
                     task_id=task_id, queue=queue)
    log.debug('QA of package put into celery queue %s: %s',
              queue, package.name)
def create_archiver_task(resource, queue):
    from pylons import config
    package = resource.resource_group.package
    task_id = '%s/%s/%s' % (package.name, resource.id[:4], make_uuid()[:4])
    ckan_ini_filepath = os.path.abspath(config.__file__)
    celery.send_task('archiver.update', args=[ckan_ini_filepath, resource.id, queue],
                     task_id=task_id, queue=queue)
    log.debug('Archival of resource put into celery queue %s: %s/%s url=%r', queue, package.name, resource.id, resource.url)
 def after_update(self, context, data):
     if self._has_certs_config():
         log.debug("Scheduling new certificate task for existing '%s' dataset", data['name'])
         celery.send_task(
             'certificate.new',
             args=[self._get_task_context(context), self._get_package_data(data), True],
             task_id=make_uuid()
         )
Exemple #18
0
def create_archiver_package_task(package, queue):
    from pylons import config
    task_id = '%s/%s' % (package.name, make_uuid()[:4])
    ckan_ini_filepath = os.path.abspath(config['__file__'])
    celery.send_task('archiver.update_package',
                     args=[ckan_ini_filepath, package.id, queue],
                     task_id=task_id, queue=queue)
    log.debug('Archival of package put into celery queue %s: %s',
              queue, package.name)
Exemple #19
0
def create_delete_resource_task(resource):
    """
    Creates the celery task for raster resource deletion
    :param resource: the resource to be deleted
    """
    context = _make_default_context()
    context['resource_dict'] = resource
    task_id = make_uuid()
    celery.send_task('rasterstorer.delete', args=[context], task_id=task_id)
Exemple #20
0
def launch_transformations():    
    log.info('Checking transformation crontabs')
    transformations = session.query(Transformation).all()

    for t in transformations:
        if must_run(t.minute, t.hour, t.day_of_week):
            mainclass, _ = get_config_data(t.output_dir)
            celery.send_task("extractor.perform_extraction",
                args=[t.package_id, mainclass], task_id=str(uuid.uuid4()))
def syncronize_org(id):
    ckan_ini_filepath = os.path.abspath(config['__file__'])
    packages = model.Session.query(model.Package).filter_by(
        owner_org=id
    )
    logger.info('Starting update task for [{0}] datasets'.format(packages.count()))
    for pkg in packages:
        celery.send_task(
            'datadotworld.syncronize',
            args=[pkg.id, ckan_ini_filepath])
Exemple #22
0
def create_ingest_resource_task(resource):
    """
    Creates the celery task for raster resource ingestion
    :param resource: the resource to be ingested
    """
    task_id = make_uuid()
    context = _make_default_context()
    resource_dict = resource.as_dict()
    context['resource_dict'] = resource_dict
    celery.send_task('rasterstorer.import', args=[context], task_id=task_id)
Exemple #23
0
def create_qa_update_task(resource, queue):
    from pylons import config
    package = resource.resource_group.package
    task_id = '%s/%s/%s' % (package.name, resource.id[:4], make_uuid()[:4])
    ckan_ini_filepath = os.path.abspath(config.__file__)
    celery.send_task('qa.update',
                     args=[ckan_ini_filepath, resource.id],
                     task_id=task_id,
                     queue=queue)
    log.debug('QA of resource put into celery queue %s: %s/%s url=%r', queue,
              package.name, resource.id, resource.url)
    def receive_data(self, operation, queue, **params):
        from ckan.lib.celery_app import celery
        if operation == 'package-archived':
            package_id = params.get('package_id')

            ckan_ini_filepath = os.path.abspath(config['__file__'])
            task_id = str(uuid.uuid4())
            celery.send_task('packagezip.create_zip',
                             args=[ckan_ini_filepath, package_id, queue],
                             task_id=task_id, queue=queue)
            log.debug('Package zip of package put into celery queue %s: %s', queue, package_id)
    def _push_failed(self):
        # XXX: DO NOT IMPORT celery at the top of file - it will
        # use incorrect config then and you'll receive error like
        # "no section app:main in config file"
        from ckan.lib.celery_app import celery
        ckan_ini_filepath = path.abspath(config['__file__'])
        failed = model.Session.query(Extras).filter_by(state='failed')

        for record in failed:
            celery.send_task('datadotworld.syncronize',
                             args=[record.package_id, ckan_ini_filepath])
def create_package_task(package, queue):
    from pylons import config
    from ckan.model.types import make_uuid
    log = __import__('logging').getLogger(__name__)
    task_id = '%s/%s' % (package.name, make_uuid()[:4])
    ckan_ini_filepath = os.path.abspath(config['__file__'])
    celery.send_task('gemini_postprocess.process_package',
                     args=[ckan_ini_filepath, package.id, queue],
                     task_id=task_id, queue=queue)
    log.debug('Gemini PostProcess of package put into celery queue %s: %s',
              queue, package.name)
def compat_enqueue(name, fn, args=None):
    u'''
    Enqueue a background job using Celery or RQ.
    '''
    try:
        # Try to use RQ
        from ckan.lib.jobs import enqueue
        enqueue(fn, args=args)
    except ImportError:
        # Fallback to Celery
        from ckan.lib.celery_app import celery
        celery.send_task(name, args=args)
Exemple #28
0
def create_qa_update_task(resource, queue):
    from pylons import config
    if p.toolkit.check_ckan_version(max_version='2.2.99'):
        package = resource.resource_group.package
    else:
        package = resource.package
    task_id = '%s/%s/%s' % (package.name, resource.id[:4], make_uuid()[:4])
    ckan_ini_filepath = os.path.abspath(config.__file__)
    celery.send_task('qa.update', args=[ckan_ini_filepath, resource.id],
                     task_id=task_id, queue=queue)
    log.debug('QA of resource put into celery queue %s: %s/%s url=%r',
              queue, package.name, resource.id, resource.url)
Exemple #29
0
def create_package_task(package, queue):
    from pylons import config
    from ckan.model.types import make_uuid
    log = __import__('logging').getLogger(__name__)
    task_id = '%s/%s' % (package.name, make_uuid()[:4])
    ckan_ini_filepath = os.path.abspath(config['__file__'])
    celery.send_task('gemini_postprocess.process_package',
                     args=[ckan_ini_filepath, package.id, queue],
                     task_id=task_id,
                     queue=queue)
    log.debug('Gemini PostProcess of package put into celery queue %s: %s',
              queue, package.name)
Exemple #30
0
def compat_enqueue(name, fn, args=None):
    u'''
    Enqueue a background job using Celery or RQ.
    '''
    try:
        # Try to use RQ
        from ckan.plugins.toolkit import enqueue_job
        enqueue_job(fn, args=args)
    except ImportError:
        # Fallback to Celery
        import uuid
        from ckan.lib.celery_app import celery
        celery.send_task(name, args=args, task_id=str(uuid.uuid4()))
def create_delete_resource_task(resource):
    """
    Creates the celery task for raster resource deletion
    :param resource: the resource to be deleted
    """
    context = _make_default_context()
    context['resource_dict'] = resource
    task_id = make_uuid()
    celery.send_task(
        'rasterstorer.delete',
        args=[context],
        task_id=task_id
    )
    def deploy_transformation(self, transformation):
        mainclass, required = get_config_data(transformation.output_dir)
        transformation_instance = get_instance(transformation.output_dir, mainclass)
        transformation_instance.deploy()

        #install depedencies using celery
        celery.send_task("extractor.install_dependencies",
            args=[required], task_id=str(uuid.uuid4()))

        #remove extraction log
        transformation.extractions = []
        model.Session.merge(transformation)
        model.Session.commit()
    def receive_data(self, operation, queue, **params):
        from ckan.lib.celery_app import celery
        if operation == 'package-archived':
            package_id = params.get('package_id')

            ckan_ini_filepath = os.path.abspath(config['__file__'])
            task_id = str(uuid.uuid4())
            celery.send_task('packagezip.create_zip',
                             args=[ckan_ini_filepath, package_id, queue],
                             task_id=task_id,
                             queue=queue)
            log.debug('Package zip of package put into celery queue %s: %s',
                      queue, package_id)
Exemple #34
0
    def _create_archiver_task(self, resource):
        from ckan.lib.base import c
        site_user = get_action('get_site_user')(
            {'model': model, 'ignore_auth': True, 'defer_commit': True}, {}
        )
        # If the code that triggers this is run from the command line, the c 
        # stacked object proxy variable will not have been set up by the paste
        # registry so will give an error saying no object has been registered
        # for this thread. The easiest thing to do is to catch this, but it 
        # would be nice to have a config option so that the behaviour can be 
        # specified.
        try:
            c.user
        except TypeError:
            # This is no different from running the archiver from the command line:
            # See https://github.com/okfn/ckanext-archiver/blob/master/ckanext/archiver/commands.py
            username = site_user['name']
            userapikey = site_user['apikey']
        else:
            user = model.User.by_name(c.user)
            username = user.name
            userapikey = user.apikey
        context = json.dumps({
            'site_url': self.site_url,
            'apikey': userapikey,
            'username': username,
            'cache_url_root': self.cache_url_root,
            'site_user_apikey': site_user['apikey']
        })
        res_dict = resource_dictize(resource, {'model': model})
        data = json.dumps(res_dict)

        task_id = make_uuid()
        archiver_task_status = {
            'entity_id': resource.id,
            'entity_type': u'resource',
            'task_type': u'archiver',
            'key': u'celery_task_id',
            'value': task_id,
            'error': u'',
            'last_updated': datetime.now().isoformat()
        }
        archiver_task_context = {
            'model': model,
            'user': site_user['name'],
            'ignore_auth': True
        }

        get_action('task_status_update')(archiver_task_context, archiver_task_status)
        celery.send_task("archiver.update", args=[context, data], task_id=task_id)
        log.debug('Archival of resource put into celery queue: %s url=%r user=%s site_user=%s site_url=%s', resource.id, res_dict.get('url'), username, site_user['name'], self.site_url)
def create_ingest_resource_task(resource):
    """
    Creates the celery task for raster resource ingestion
    :param resource: the resource to be ingested
    """
    task_id = make_uuid()
    context = _make_default_context()
    resource_dict = resource.as_dict()
    context['resource_dict'] = resource_dict
    celery.send_task(
        'rasterstorer.import',
        args=[context],
        task_id=task_id
    )
Exemple #36
0
def create_archiver_resource_task(resource, queue):
    from pylons import config
    if p.toolkit.check_ckan_version(max_version='2.2.99'):
        # earlier CKANs had ResourceGroup
        package = resource.resource_group.package
    else:
        package = resource.package
    task_id = '%s/%s/%s' % (package.name, resource.id[:4], make_uuid()[:4])
    ckan_ini_filepath = os.path.abspath(config['__file__'])
    celery.send_task('archiver.update_resource',
                     args=[ckan_ini_filepath, resource.id, queue],
                     task_id=task_id, queue=queue)
    log.debug('Archival of resource put into celery queue %s: %s/%s url=%r',
              queue, package.name, resource.id, resource.url)
Exemple #37
0
def compat_enqueue(name, fn, queue, args=[], kwargs={}):
    u'''
    Enqueue a background job using Celery or RQ.
    '''
    try:
        # Try to use RQ
        from ckan.plugins.toolkit import enqueue_job
        nice_name = name + " " + args[1] if (len(args) >= 2) else name
        enqueue_job(fn, args=args, kwargs=kwargs, queue=queue, title=nice_name)
    except ImportError:
        # Fallback to Celery
        import uuid
        from ckan.lib.celery_app import celery
        celery.send_task(name, args=args + [queue], task_id=six.text_type(uuid.uuid4()))
Exemple #38
0
def compat_enqueue(name, fn, queue, args=None):

    u'''
    Enqueue a background job using Celery or RQ.
    '''
    try:
        # Try to use RQ
        from ckan.plugins.toolkit import enqueue_job
        enqueue_job(fn, args=args, queue=queue)
    except ImportError:
        # Fallback to Celery
        import uuid
        from ckan.lib.celery_app import celery
        celery.send_task(name, args=args + [queue], task_id=str(uuid.uuid4()))
Exemple #39
0
def enqueue_document(user, filename, publisher):
    """
    Uses the provided data to send a job to the priority celery queue so that
    the spreadsheet is processed. We should create a job_started message header_row
    to ensure that the user sees immediately that something is happening.
    """
    from pylons import config
    from ckan import model
    from ckan.model.types import make_uuid
    from ckan.lib.celery_app import celery

    site_user = get_action('get_site_user')(
        {'model': model, 'ignore_auth': True, 'defer_commit': True}, {})

    task_id = make_uuid()

    # Create the task for the queue
    context = json.dumps({
        'username': user.name,
        'site_url': config.get('ckan.site_url_internally') or config.get('ckan.site_url'),
        'apikey': user.apikey,
        'site_user_apikey': site_user['apikey']
    })
    data = json.dumps({
        'file': filename,
        'publisher': publisher.name,
        'publisher_id': publisher.id,
        'jobid': task_id
    })
    celery.send_task("inventory.process", args=[context, data], task_id=task_id, queue='priority')

    # Create a task status.... and update it so that the user knows it has been started.
    inventory_task_status = {
        'entity_id': task_id,
        'entity_type': u'inventory',
        'task_type': u'inventory',
        'key': u'celery_task_id',
        'value': task_id,
        'error': u'',
        'state': 'Started',
        'last_updated': datetime.datetime.now().isoformat()
    }
    inventory_task_context = {
        'model': model,
        'user': user.name,
        'ignore_auth': True
    }
    res = get_action('task_status_update')(inventory_task_context, inventory_task_status)
    return res['id'], inventory_task_status['last_updated']
def update_vector_storer_task(resource):
    user = _get_site_user()
    resource_package_id = resource.as_dict()['package_id']
    resource_list_to_delete = _get_child_resources(resource.as_dict())
    context = json.dumps({'resource_list_to_delete': resource_list_to_delete,
     'package_id': resource_package_id,
     'site_url': _get_site_url(),
     'apikey': user.get('apikey'),
     'site_user_apikey': user.get('apikey'),
     'user': user.get('name'),
     'db_params': config['ckan.datastore.write_url']})
    geoserver_context = _get_geoserver_context()
    data = json.dumps(resource_dictize(resource, {'model': model}))
    task_id = make_uuid()
    celery.send_task('vectorstorer.update', args=[geoserver_context, context, data], task_id=task_id)
def identify_resource(resource_obj):
    user_api_key =  _get_site_user()['apikey']
    res_dict = resource_dictize(resource_obj, {'model': model})
    resource=resource_obj.as_dict()
    
    '''With resource_dictize we get the correct resource url even if dataset is in draft state   '''
    
    resource['url']=res_dict['url']
    task_id = make_uuid()

    data = json.dumps(resource)
    celery.send_task('vectorstorer.identify_resource', args=[data,user_api_key], task_id=task_id)
    
    res_identify = ResourceIdentify(task_id,resource['id'])
    ckan.model.Session.add(res_identify)
    def notify(self, entity, operation=None):
        if isinstance(entity, model.Resource):
            resource_id = entity.id
            # new event is sent, then a changed event.
            if operation == DomainObjectOperation.changed:
                # There is a NEW or CHANGED resource. We should check if
                # it is a shape file and pass it off to Denis's code if
                # so it can process it
                site_url = config.get('ckan.site_url', 'http://localhost/')
                apikey = model.User.get('default').apikey

                celery.send_task(
                    'geopusher.process_resource',
                    args=[resource_id, site_url, apikey],
                    task_id='{}-{}'.format(str(uuid.uuid4()), operation))
Exemple #43
0
    def notify(self, entity, operation=None):
        if isinstance(entity, model.Resource):
            resource_id = entity.id
            # new event is sent, then a changed event.
            if operation == DomainObjectOperation.changed:
                # There is a NEW or CHANGED resource. We should check if
                # it is a shape file and pass it off to Denis's code if
                # so it can process it
                site_url = config.get('ckan.site_url', 'http://localhost/')
                apikey = model.User.get('default').apikey

                celery.send_task('geopusher.process_resource',
                                 args=[resource_id, site_url, apikey],
                                 task_id='{}-{}'.format(
                                     str(uuid.uuid4()), operation))
def update_ingest_resource(resource):
    package_id = resource.as_dict()['package_id']
    resource_list_to_delete = _get_child_resources(resource.as_dict())
    context = _make_default_context()
    context.update({
        'resource_list_to_delete': resource_list_to_delete,
        'package_id': package_id,
        'db_params': config['ckan.datastore.write_url'],
    })
    geoserver_context = _make_geoserver_context()
    resource_dict = resource_dictize(resource, {'model': model})
    task_id = make_uuid()
    celery.send_task('vectorstorer.update',
                     args=[resource_dict, context, geoserver_context],
                     task_id=task_id)
Exemple #45
0
    def command(self):
        self._load_config()

        task_id = self.args[0]
        task_name = self.args[1]
        task_frequency = self.args[2].upper()
        task_data = self.args[3]

        context = {
            'model': model,
            'session': model.Session,
            'ignore_auth': True
        }
        get_action('ytp_tasks_add')(context, {
            'id': task_id,
            'task': task_name,
            'frequency': task_frequency.upper(),
            'data': task_data
        })

        if self.options.wait or self.options.execute:
            import uuid
            from ckan.lib.celery_app import celery

            if self.verbose:
                print u"Executing %s" % task_name
            result = celery.send_task(task_name,
                                      args=(task_data, ),
                                      task_id=str(uuid.uuid4()))
            if self.options.wait:
                result.get(timeout=int(self.options.wait))
    def launch_transformation(self, id):
        log.info('Launching transformation for package name: %s' % id)

        # using default functionality
        self.read(id)

        t = model.Session.query(Transformation).filter_by(package_id=c.pkg.id).first()

        mainclass, _ = get_config_data(t.output_dir)
        celery.send_task("extractor.perform_extraction",
            args=[t.package_id, mainclass], task_id=str(uuid.uuid4()))

        self.get_transformation_data(c.pkg.id, c)
        c.error = False

        #rendering using template
        return render('extractor/read.html')
Exemple #47
0
    def delete_me(self):
        try:
            if not c.userobj:
                raise NotAuthorized

            if request.params.get('delete', None) != 'true':
                return render('user/delete_me.html')

            from ckan.lib.celery_app import celery
            context = {'model': model, 'session': model.Session, 'user': c.user}
            toolkit.check_access('user_delete_me', context, {})

            celery.send_task("ckanext.ytp_drupal.delete_user", args=(c.userobj.id,), task_id=str(uuid.uuid4()))
            redirect(get_plugin('ytp_drupal').cancel_url)
        except NotAuthorized:
            msg = _('Unauthorized to delete user')
            abort(401, msg.format(user_id=id))
Exemple #48
0
 def _create_torrent_task(self, resource):
     log.info("resource attrs: %s", dir(resource))
     log.info("resource id: %s", resource.id)
     tracker_url = config.get('ckan.torrent_tracker_url', '')
     if not tracker_url:
         return None
     storage_path = config.get('ckan.storage_path','')
     storage_path = os.path.join(storage_path, 'resources')
     resource_path = get_path(storage_path, resource.id)
     log.info('resource path %s', resource_path)
     torrent_storage_path = config.get('ckan.torrent_storage_path','')
     payload_storage_path = config.get('ckan.torrent_payload_storage_path', '')
     if not torrent_storage_path:
         torrent_storage_path = os.path.join(storage_path,'torrents')
     if not payload_storage_path:
         payload_storage_path = os.path.join(storage_path,'payload')
     celery.send_task("torrent.create", args=[resource_path, torrent_storage_path, payload_storage_path,resource.id, tracker_url], countdown=10, task_id=str(uuid.uuid4()))    
def create_vector_storer_task(resource, extra_params = None):
    user = _get_site_user()
    resource_package_id = resource.as_dict()['package_id']
    cont = {'package_id': resource_package_id,
     'site_url': _get_site_url(),
     'apikey': user.get('apikey'),
     'site_user_apikey': user.get('apikey'),
     'user': user.get('name'),
     'db_params': config['ckan.datastore.write_url']}
    if extra_params:
        for key, value in extra_params.iteritems():
            cont[key] = value

    context = json.dumps(cont)
    geoserver_context = _get_geoserver_context()
    data = json.dumps(resource_dictize(resource, {'model': model}))
    task_id = make_uuid()
    celery.send_task('vectorstorer.upload', args=[geoserver_context, context, data], task_id=task_id)
def create_identify_resource_task(resource):
    """
    Creates the celery task to identify the resource
    :param resource: the resource to be identified
    """

    task_id = make_uuid()
    
    # We are using resource_dictize() just to force CKAN to provide an absolute url
    # Note Maybe a more clean way to achive this would be to call something like 
    # url_for(controller='package', action='resource_download', id=package_id, resource_id=resource_id)
    package_id = resource.as_dict()['package_id']
    resource_dict = resource_dictize(resource, {'model': model})
    resource_dict['package_id'] = package_id
    
    context = _make_default_context()
    context['resource_dict'] = resource_dict
    celery.send_task(
        'rasterstorer.identify',
        args=[context],
        task_id=task_id
    )

    res_identify = model.Session.query(ResourceIngest).filter(
        ResourceIngest.resource_id == resource.id).first()
    if res_identify:
        # This is when a user had previously rejected the ingestion workflow,
        # but now wants to re-identify the resource
        model.Session.delete(res_identify)
        new_res_identify = ResourceIngest(
            task_id,
            resource.id,
            ResourceStorerType.RASTER
        )
        model.Session.add(new_res_identify)
        model.Session.commit()
    else:
        # A newly created/updated resource needs to be identified
        new_res_identify = ResourceIngest(
            task_id,
            resource.id,
            ResourceStorerType.RASTER
        )
        model.Session.add(new_res_identify)
Exemple #51
0
    def notify(self, entity, operation=None):
        context = {'model': model, 'ignore_auth': True, 'defer_commit': True}

        if isinstance(entity, model.Resource):
            if not operation:
                #This happens on IResourceURLChange, but I'm not sure whether
                #to make this into a webhook.
                return
            elif operation == DomainObjectOperation.new:
                topic = 'resource/create'

            if operation == DomainObjectOperation.changed:
                topic = 'resource/update'

            elif operation == DomainObjectOperation.deleted:
                topic = 'resource/delete'

            else:
                return

        if isinstance(entity, model.Package):
            if operation == DomainObjectOperation.new:
                topic = 'dataset/create'

            elif operation == DomainObjectOperation.changed:
                topic = 'dataset/update'

            elif operation == DomainObjectOperation.deleted:
                topic = 'dataset/delete'

            else:
                return

        webhooks = db.Webhook.find(topic=topic)

        for hook in webhooks:
            resource = table_dictize(entity, context)
            webhook = table_dictize(hook, context)
            celery.send_task(
                'webhooks.notify_hooks',
                args=[resource, webhook,
                      config.get('ckan.site_url')],
                task_id='{}-{}'.format(str(uuid.uuid4()), topic))
    def command(self):
        """
        Parse command line arguments and call appropriate method.
        """
        if not self.args or self.args[0] in ['--help', '-h', 'help']:
            print self.usage
            sys.exit(1)

        cmd = self.args[0]
        self._load_config()

        self.log = logging.getLogger(__name__)

        if cmd == 'init':
            import ckan.model as model
            from ckanext.packagezip.model import init_tables
            init_tables(model.meta.engine)
            self.log.info('Package Zip tables are initialized')
        elif cmd == 'create-zip':
            import ckan.model as model
            from ckan.lib.celery_app import celery

            datasets = []
            for name in self.args[1:]:
                name_ = name_stripped_of_url(name)
                dataset = model.Package.get(name_)
                assert dataset, 'Could not find dataset: %s' % name
                datasets.append(dataset)
            assert datasets, 'No datasets to zip!'

            ckan_ini_filepath = os.path.abspath(config['__file__'])
            task_id = str(uuid.uuid4())
            queue = 'priority'

            for dataset in datasets:
                celery.send_task('packagezip.create_zip',
                                 args=[ckan_ini_filepath, dataset.id, queue],
                                 task_id=task_id,
                                 queue=queue)
                self.log.info(u'Queued %s' % dataset.name)
        else:
            self.log.error('Command %s not recognized' % (cmd, ))
Exemple #53
0
    def command(self):
        self._load_config()

        import uuid
        from ckan.lib.celery_app import celery

        if self.verbose:
            print u"Executing all scheduled tasks now"
        task = celery.send_task("ckanext.ytp.tasks.execute_all", task_id=str(uuid.uuid4()))
        if self.verbose:
            print u"Task '%s' set to queue" % unicode(task)
def create_ingest_resource(resource, layer_params):
    package_id = resource.as_dict()['package_id']
    context = _make_default_context()
    context.update({
        'package_id': package_id,
        'db_params': config['ckan.datastore.write_url'],
        'layer_params': layer_params
    })
    geoserver_context = _make_geoserver_context()
    resource_dict = resource_dictize(resource, {'model': model})
    task_id = make_uuid()
    celery.send_task('vectorstorer.upload',
                     args=[resource_dict, context, geoserver_context],
                     task_id=task_id)

    res_ingest = model.Session.query(ResourceIngest).filter(
        ResourceIngest.resource_id == resource.id).first()
    res_ingest.status = IngestStatus.PUBLISHED
    res_ingest.celery_task_id = task_id
    model.Session.commit()
Exemple #55
0
    def _create_datastorer_task(self, resource):
        user = get_action('get_site_user')({
            'model': model,
            'ignore_auth': True,
            'defer_commit': True
        }, {})

        context = {
            'site_url': self._get_site_url(),
            'apikey': user.get('apikey'),
            'site_user_apikey': user.get('apikey'),
            'username': user.get('name'),
        }

        if self.sample_size:
            context['sample_size'] = self.sample_size

        data = resource_dictize(resource, {'model': model})

        task_id = make_uuid()
        datastorer_task_status = {
            'entity_id': resource.id,
            'entity_type': u'resource',
            'task_type': u'datastorer',
            'key': u'celery_task_id',
            'value': task_id,
            'last_updated': datetime.now().isoformat()
        }
        archiver_task_context = {
            'model': model,
            'user': user.get('name'),
        }
        get_action('task_status_update')(archiver_task_context,
                                         datastorer_task_status)
        celery.send_task("datastorer.upload",
                         args=[json.dumps(context),
                               json.dumps(data)],
                         countdown=15,
                         task_id=task_id)
        logger.info('Sent task: datastorer.upload id=%s context=%r' %
                    (task_id, context))
Exemple #56
0
def _celery_task(resource_id, action, tempdir):
    site_url = config.get('ckan.site_url', 'http://localhost/')
    apikey = model.User.get('default').apikey
    mvtconfig = {
        'bucket': config.get('ckanext.mvt.s3.bucket'),
        'access_key': config.get('ckanext.mvt.s3.access_key'),
        'secret_key': config.get('ckanext.mvt.s3.secret_key'),
        'max_size': config.get('ckanext.mvt.max_size')
    }

    celery.send_task(
        'mvt.process_resource',
        args=[
            resource_id,
            site_url,
            apikey,
            mvtconfig,
            tempdir,
            action
        ],
        task_id='{}-{}'.format(str(uuid.uuid4()), action)
    )