def upload_rdf(pkg_data, data, data_format): # Task status: RUNNING task_info = { 'entity_id': pkg_data['id'], 'entity_type': u'package', 'task_type': u'upload_rdf', 'key': u'celery_task_status', 'value': u'%s - %s' % ('RUNNING', unicode(upload_rdf.request.id)), 'error': u'', 'last_updated': datetime.now().isoformat() } update_task_status(task_info) graph = None try: graph = validate_rdf_data(data, data_format) except: # Task status: ERROR task_info = { 'entity_id': pkg_data['id'], 'entity_type': u'package', 'task_type': u'upload_rdf', 'key': u'celery_task_status', 'value': u'%s - %s' % ('ERROR', unicode(upload_rdf.request.id)), 'error': u'Uploaded data is not valid RDF or it\'s not in the given format (%s).' % data_format, 'last_updated': datetime.now().isoformat() } update_task_status(task_info) return 0 try: upload_rdf_data(graph, pkg_data) except Exception, e: # Task status: ERROR task_info = { 'entity_id': pkg_data['id'], 'entity_type': u'package', 'task_type': u'upload_rdf', 'key': u'celery_task_status', 'value': u'%s - %s' % ('ERROR', unicode(upload_rdf.request.id)), 'error': u'Could not upload RDF data. %s' % e, 'last_updated': datetime.now().isoformat() } update_task_status(task_info) return 0
def dataset_rdf_crawler(): # Task status: RUNNING task_info = { 'entity_id': 'GLOBAL', 'entity_type': u'package', 'task_type': u'rdf_crawler', 'key': u'celery_task_status', 'value': u'%s - %s' % ('RUNNING', unicode(dataset_rdf_crawler.request.id)), 'error': u'', 'last_updated': datetime.now().isoformat() } update_task_status(task_info) try: crawl_and_upload_data() # Task status: FINISHED task_info = { 'entity_id': 'GLOBAL', 'entity_type': u'package', 'task_type': u'rdf_crawler', 'key': u'celery_task_status', 'value': u'%s - %s' % ('FINISHED', unicode(dataset_rdf_crawler.request.id)), 'error': u'', 'last_updated': datetime.now().isoformat() } update_task_status(task_info) except Exception, e: # Task status: ERROR task_info = { 'entity_id': 'GLOBAL', 'entity_type': u'package', 'task_type': u'rdf_crawler', 'key': u'celery_task_status', 'value': u'%s - %s' % ('ERROR', unicode(dataset_rdf_crawler.request.id)), 'error': u'Error ocurred while crawling RDF data from CKAN. %s' % e, 'last_updated': datetime.now().isoformat() } update_task_status(task_info)
'last_updated': datetime.now().isoformat() } update_task_status(task_info) return 0 # Task status: FINISHED task_info = { 'entity_id': pkg_data['id'], 'entity_type': u'package', 'task_type': u'upload_rdf', 'key': u'celery_task_status', 'value': u'%s - %s' % ('FINISHED', unicode(upload_rdf.request.id)), 'error': u'', 'last_updated': datetime.now().isoformat() } update_task_status(task_info) try: pkg = get_package(pkg_data['id']) for res in pkg['resources']: if 'generated_by_ckanextsparql' in res and res['generated_by_ckanextsparql']: res['last_modified'] = datetime.now().isoformat() update_resource(res) break except: print 'Unable to update last_modified' return 1 @periodic_task(run_every=periodicity) def dataset_rdf_crawler():