def sanitize_local_data_location():
	log.debug("sanitize_local_data_location")
	# import pdb; pdb.set_trace()
	# We check that the data_cache_path is not empty
	# otherwise the nfs could be not mounted and it would result in all files being deleted
	cache_path = GlobalConfig.get_or_fail("data_cache_path")
	if not os.path.exists(cache_path) or not os.listdir(cache_path):
		raise Exception("Cancelling sanitize_local_data_location, suspecting nfs storage not mounted: %s is empty" % cache_path)
	
	data_location_ids = LocalDataLocation.objects.values_list("id", flat=True)
	# Check for each local data location registered if that the files exists
	# Rows are locked one by one to minimize service interruption
	for data_location_id in data_location_ids:
		try:
			with transaction.atomic():
				try:
					data_location = LocalDataLocation.objects.select_for_update(nowait=True).get(id=data_location_id)
				except LocalDataLocation.DoesNotExist:
					pass
				else:
					if not check_file_exists(data_location.path):
						log.info("Cleaning up LocalDataLocation, missing file for %s", data_location)
						data_location.delete()
		except OperationalError, why:
			log.warning("Could not lock database rows for LocalDataLocation: %s", why)
Exemple #2
0
def execute_metadata_update_requests():
	log.debug("execute_metadata_update_requests")
	
	request_timeout = GlobalConfig.get("metadata_update_request_timeout", timedelta(days=1))
	
	# Only one of these should run at any time
	# So try to open a transaction and lock the rows in nowait
	try:
		with transaction.atomic():
			for request in MetadataUpdateRequest.objects.select_for_update(nowait=True).all():
				if request.status == "NEW":
					update_request_status(request, "RUNNING")
					# If the recnum is the same we skip
					if request.old_recnum != request.recnum:
						# It is possible the file is not stored locally
						try:
							current_data_location = LocalDataLocation.objects.get(recnum=request.old_recnum)
						except LocalDataLocation.DoesNotExist:
							log.debug("Trying to update meta-data for recnum %s but no data location found", request.old_recnum)
							update_request_status(request, "DONE")
						else:
							# If the file is not really on disk, we cleanup
							if not check_file_exists(current_data_location.path):
								log.info("Cleaning up LocalDataLocation, missing file for %s", data_location)
								current_data_location.delete()
								update_request_status(request, "DONE")
							else:
								# Because meta-data is written in the file, we need to make a copy of the file to break hard links and give it a new name
								new_local_file_path = LocalDataLocation.create_location(request)
								try:
									shutil.copyfile(current_data_location.path, new_local_file_path)
								except IOError, why:
									log.error("Could not copy file %s to %s: %s", current_data_location.path, new_local_file_path, why)
									app.mail_admins("Meta-data update request error", "Request %s\nCould not copy file %s to %s: %s" % (request,current_data_location.path, new_local_file_path, str(why)))
									update_request_status(request, "ERROR")
								else:
									current_data_location.delete()
									update_file_metadata.apply_async((request, ), link=update_request_status.si(request, "DONE"))
			
				# If the request is running for too long there could be a problem
				elif request.status == "RUNNING" and request.updated + request_timeout < datetime.now():
					update_request_status(request, "TIMEOUT")
					app.mail_admins("Request timeout", "The metadata_update request %s has been running since %s and passed it's timeout %s" % (request.id, request.updated, request_timeout))
def get_data(request):
	log.debug("get_data %s", request)
	# Try to get the file path for the local data site, otherwise download the data
	try:
		file_path = get_file_path(request, local_data_site = True)
		log.debug("File for request %s already in cache", request)
	except Exception:
		log.debug("Downloading file for request %s", request)
		download_data(request)
		file_path = get_file_path(request, local_data_site = True)
	else:
		# Check that the file really exists
		if not check_file_exists(file_path):
			log.debug("File for request %s in DB but not on disk, missing %s. Downloading.", request, file_path)
			download_data(request)
			file_path = get_file_path(request, local_data_site = True)
		
		# If file exists, update the expiration date (in case it is later than current one)
		elif request.expiration_date:
			LocalDataLocation.update_expiration_date(request, request.expiration_date)
	
	return file_path
def test_check_file_does_not_exists_case():
    assert check_file_exists('.\\akom.json') is False
def test_check_file_exists_case():
    assert check_file_exists('example/config.json') is True
def test_check_empty_filename_case():
    with pytest.raises(TypeError):
        check_file_exists()
def test_check_file_does_not_exists_case_2():
    assert check_file_exists('123') is False
import sys
import logging.handlers
import tasks
from python_json_config import ConfigBuilder

# define path to config file
config_file = '.\\' + sys.argv[1:][0]

# create config parser
builder = ConfigBuilder()

# parse config
if tasks.check_file_exists(config_file):
    config = builder.parse_config(config_file)

# create logger
logger = logging.getLogger(config.site_name)
logger.setLevel(logging.DEBUG)

# create file handler which logs messages
fh = logging.handlers.RotatingFileHandler(config.logpath + config.site_name +
                                          '.log',
                                          maxBytes=10500000,
                                          backupCount=5)
#fh = logging.FileHandler(config.logpath + config.site_name + '.log')
fh.setLevel(logging.DEBUG)

# create console handler
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)