Example #1
0
def cleanErrItem(item_id, count):
	try:
		bucket = getBucket()
		i = 0
		
		while count > i:
			if i == 0:
				filename = '%s.jp2' % item_id
			else:
				filename = '%s/%s.jp2' % (item_id, i)
			
			i += 1
			
			bucket.delete_key(S3_DEFAULT_FOLDER + filename)
		
		if count > 1:
			filename = '%s/' % item_id
			bucket.delete_key(S3_DEFAULT_FOLDER + filename)
		
	except:
		pass
	
	try:
		cloudsearch = getCloudSearch(CLOUDSEARCH_ITEM_DOMAIN, 'document')
		cloudsearch.delete(hashlib.sha512(item_id).hexdigest()[:128])
		cloudsearch.commit()
	except:
		pass

	try:
		Item(item_id).delete()
	except:
		pass
	
	return
Example #2
0
def finalizeItem(batch_id, item_id, item_tasks_count):
	item_tasks = []
	
	for task_order in range(0, item_tasks_count):
		item_tasks.append(Task(batch_id, item_id, task_order))
	
	# the task with highest id for the specific item has all item data
	last_task = item_tasks[-1]
	item_data = last_task.item_data
	item_data['timestamp'] = datetime.utcnow().isoformat("T") + "Z"
	
	if item_data.has_key('status') and item_data['status'] == 'deleted':
		whole_item_delete = True
	else:
		whole_item_delete = False
	
	try:
		old_item = Item(item_id)
	except:
		old_item = None

	if old_item:
		if not whole_item_delete:
			item_data['image_meta'] = old_item.image_meta
	else:
		item_data['image_meta'] = {}
	
	error = False
	
	if not whole_item_delete:
		for task in item_tasks:
			if task.status == 'pending' or task.status == 'error':
				error = True
			# modification tasks never changes image_meta
			elif task.type == 'mod':
				pass
			elif task.status == 'deleted':
				# if the image is being really deleted not only being reshuffled
				if not task.url in item_data['url']:
					item_data['image_meta'].pop(task.url, None)
			elif task.status == 'ok':
				item_data['image_meta'][task.url] = task.image_meta

	if not error:
		if not (old_item and whole_item_delete):
			item = Item(item_id, item_data)
			ordered_image_meta = []
		
			for url in item.url:
				tmp = item.image_meta[url]
				tmp['url'] = url
				ordered_image_meta.append(tmp)
			
		if CLOUDSEARCH_ITEM_DOMAIN is not None:
			try:
				cloudsearch = getCloudSearch(CLOUDSEARCH_ITEM_DOMAIN, 'document')
				
				if old_item and whole_item_delete:
					cloudsearch.delete(hashlib.sha512(item_id).hexdigest()[:128])
				else:
					cloudsearch.add(hashlib.sha512(item_id).hexdigest()[:128], {'id': item.id, 'title': item.title, 'creator': item.creator, 'source': item.source, 'institution': item.institution, 'institution_link': item.institution_link, 'license': item.license, 'description': item.description, 'url': json.dumps(item.url), 'timestamp': item.timestamp, 'image_meta': json.dumps(ordered_image_meta)})
				
				cloudsearch.commit()
			
			except:
				if last_task.attempts < MAX_TASK_REPEAT * 2:
					print '\nFailed Cloud Search attempt numb.: %s\nItem: %s\nError message:\n###\n%s###' % (last_task.attempts + 1, task.item_id, traceback.format_exc())
					last_task.attempts += 1
					last_task.status = 'pending'
					last_task.type = 'cloud_search'
					last_task.save()
					rand = (last_task.attempts * 60) + random.randint(last_task.attempts * 60, last_task.attempts * 60 * 2)

					return ingestQueue.apply_async(args=[batch_id, item_id, last_task.task_id], countdown=rand)
				else:
					last_task.status = 'error'
					last_task.message = ERR_MESSAGE_CLOUDSEARCH
					last_task.save()
		
		if last_task.status == 'error':
			cleanErrItem(item_id, len(item_data['image_meta']))
			print "Item '%s' failed" % item_id
		elif old_item and whole_item_delete:
			old_item.delete()
			print "Item '%s' deleted" % item_id
		else:
			item.save()
			print "Item '%s' finalized" % item_id
	
	else:
		cleanErrItem(item_id, len(item_data['image_meta']))
		print "Item '%s' failed" % item_id
	
	return