Beispiel #1
0
def uploadDocument(uv_task):
	task_tag = "DOCUMENTCLOUD UPLOAD"
	print "\n\n************** %s [START] ******************\n" % task_tag
	print "uploading doc %s to DocumentCloud" % uv_task.doc_id
	uv_task.setStatus(302)
	
	from lib.Worker.Models.cp_documentcloud_client import CompassDocumentCloudClient
	from lib.Worker.Models.uv_document import UnveillanceDocument
	from conf import DEBUG
	
	document = UnveillanceDocument(_id=uv_task.doc_id)
	if document is None:
		print "\n\n************** %s [ERROR] ******************\n" % task_tag
		print "Document is None"
		uv_task.fail()
		return
	
	if not hasattr(uv_task, "auth_string"):
		print "\n\n************** %s [ERROR] ******************\n" % task_tag
		print "DocumentCloud upload needs an auth string"
		uv_task.fail()
		return
		
	dc_client = CompassDocumentCloudClient(auth_string=uv_task.auth_string)	
	upload = dc_client.upload(document)
	if DEBUG: print upload
	
	if upload is None:
		print "\n\n************** %s [ERROR] ******************\n" % task_tag
		print "DocumentCloud upload needs an auth string"
		uv_task.fail()
		return
		
	document.dc_id = upload['id']
	document.save()
	
	document.addCompletedTask(uv_task.task_path)
	uv_task.finish()
	
	print "\n\n************** %s [END] ******************\n" % task_tag
	print "Uploaded document %s" % document._id
def getAssets(uv_task):
	task_tag = "FETCHING DOCUMENTCLOUD ASSETS"
	print "\n\n************** %s [START] ******************\n" % task_tag
	print "getting DocumentCloud assets for %s" % uv_task.doc_id
	uv_task.setStatus(412)
	
	from lib.Worker.Models.cp_documentcloud_client import CompassDocumentCloudClient
	from lib.Worker.Models.uv_document import UnveillanceDocument
	from conf import DEBUG
	
	document = UnveillanceDocument(_id=uv_task.doc_id)
	
	if document is None:
		print "\n\n************** %s [ERROR] ******************\n" % task_tag
		print "Document is None"
		return
	
	if not hasattr(document, "dc_id"):
		print "\n\n************** %s [ERROR] ******************\n" % task_tag
		print "Document has not document cloud id!"
		return
	
	if not hasattr(uv_task, "auth_string"):
		print "\n\n************** %s [ERROR] ******************\n" % task_tag
		print "DocumentCloud upload needs an auth string"
		return
		
	dc_client = CompassDocumentCloudClient(auth_string=uv_task.auth_string)	
	
	dc_manifest = dc_client.download("documents/%s.json" % document.dc_id)
	if dc_manifest is None:
		print "\n\n************** %s [ERROR] ******************\n" % task_tag
		print "No DocumentCloud manifest yet for %s." % document._id
		return
	
	document.addAsset(dc_manifest, "document_cloud_manifest.json", as_literal=False,
		description="description of document on DocumentCloud",
		tags=[ASSET_TAGS['DOC_CLOUD_MANIFEST'], ASSET_TAGS['DOC_CLOUD_DOC']])
		
	dc_entities = dc_client.download("documents/%s/entities.json" % document.dc_id)
	if dc_entities is None:
		print "\n\n************** %s [WARN] ******************\n" % task_tag
		print "No DocumentCloud entiteis yet for %s." % document._id
	else:
		entity_asset = document.addAsset(dc_entities, "document_cloud_entities.json",
			as_literal=False, description="entites pulled from DocumentCloud",
			tags=[ASSET_TAGS['DOC_CLOUD_ENTITIES'], ASSET_TAGS['DOC_CLOUD_DOC']])
		
		from lib.Worker.Models.uv_text import UnveillanceText
		
		if not hasattr(document, "text_id"):
			text = UnveillanceText(inflate={
				'file_name' : entity_asset,
				'entities' : dc_entities['entities'],
				'media_id' : document._id
			})
			
			document.text_id = text._id
			document.save()
		else:
			text = UnveillanceText(_id=document.text_id)
			text.entities = dc_entities['entities']
			text.save()
		
	document.addCompletedTask(uv_task.task_path)
	uv_task.finish()