Example #1
0
def upload_rdf(pkg_data, data, data_format):
    # Task status: RUNNING
    task_info = {
        'entity_id': pkg_data['id'],
        'entity_type': u'package',
        'task_type': u'upload_rdf',
        'key': u'celery_task_status',
        'value': u'%s - %s' % ('RUNNING', unicode(upload_rdf.request.id)),
        'error': u'',
        'last_updated': datetime.now().isoformat()
    }
    update_task_status(task_info)

    graph = None
    try:
        graph = validate_rdf_data(data, data_format)
    except:
        # Task status: ERROR
        task_info = {
            'entity_id': pkg_data['id'],
            'entity_type': u'package',
            'task_type': u'upload_rdf',
            'key': u'celery_task_status',
            'value': u'%s - %s' % ('ERROR', unicode(upload_rdf.request.id)),
            'error': u'Uploaded data is not valid RDF or it\'s not in the given format (%s).' % data_format,
            'last_updated': datetime.now().isoformat()
        }
        update_task_status(task_info)
        return 0

    try:
        upload_rdf_data(graph, pkg_data)
    except Exception, e:
        # Task status: ERROR
        task_info = {
            'entity_id': pkg_data['id'],
            'entity_type': u'package',
            'task_type': u'upload_rdf',
            'key': u'celery_task_status',
            'value': u'%s - %s' % ('ERROR', unicode(upload_rdf.request.id)),
            'error': u'Could not upload RDF data. %s' % e,
            'last_updated': datetime.now().isoformat()
        }
        update_task_status(task_info)
        return 0
Example #2
0
def dataset_rdf_crawler():
    # Task status: RUNNING
    task_info = {
        'entity_id': 'GLOBAL',
        'entity_type': u'package',
        'task_type': u'rdf_crawler',
        'key': u'celery_task_status',
        'value': u'%s - %s' % ('RUNNING', unicode(dataset_rdf_crawler.request.id)),
        'error': u'',
        'last_updated': datetime.now().isoformat()
    }
    update_task_status(task_info)

    try:
        crawl_and_upload_data()

        # Task status: FINISHED
        task_info = {
            'entity_id': 'GLOBAL',
            'entity_type': u'package',
            'task_type': u'rdf_crawler',
            'key': u'celery_task_status',
            'value': u'%s - %s' % ('FINISHED', unicode(dataset_rdf_crawler.request.id)),
            'error': u'',
            'last_updated': datetime.now().isoformat()
        }
        update_task_status(task_info)

    except Exception, e:
        # Task status: ERROR
        task_info = {
            'entity_id': 'GLOBAL',
            'entity_type': u'package',
            'task_type': u'rdf_crawler',
            'key': u'celery_task_status',
            'value': u'%s - %s' % ('ERROR', unicode(dataset_rdf_crawler.request.id)),
            'error': u'Error ocurred while crawling RDF data from CKAN. %s' % e,
            'last_updated': datetime.now().isoformat()
        }
        update_task_status(task_info)
Example #3
0
            'last_updated': datetime.now().isoformat()
        }
        update_task_status(task_info)
        return 0

    # Task status: FINISHED
    task_info = {
        'entity_id': pkg_data['id'],
        'entity_type': u'package',
        'task_type': u'upload_rdf',
        'key': u'celery_task_status',
        'value': u'%s - %s' % ('FINISHED', unicode(upload_rdf.request.id)),
        'error': u'',
        'last_updated': datetime.now().isoformat()
    }
    update_task_status(task_info)

    try:
        pkg = get_package(pkg_data['id'])
        for res in pkg['resources']:
            if 'generated_by_ckanextsparql' in res and res['generated_by_ckanextsparql']:
                res['last_modified'] = datetime.now().isoformat()
                update_resource(res)
                break
    except:
        print 'Unable to update last_modified'

    return 1

@periodic_task(run_every=periodicity)
def dataset_rdf_crawler():