def create_job(self, message_id, resource): job = eva.job.Job(message_id, self.logger) job.input_filename = eva.url_to_filename(resource.url) job.template_variables = { 'datainstance': resource, 'input_filename': os.path.basename(job.input_filename), 'reference_time': resource.data.productinstance.reference_time, } # Render the Jinja2 templates and report any errors try: job.fill_file_template = self.fill_file_template.render(**job.template_variables) job.output_filename = self.output_filename.render(**job.template_variables) except Exception as e: raise eva.exceptions.InvalidConfigurationException(e) # Generate Fimex job command = ['#!/bin/bash'] command += ['#$ -S /bin/bash'] command += ["[ ! -f '%(output.fillFile)s' ] && cp -v '%(template)s' '%(output.fillFile)s'"] command += ["time fimex --input.file '%(input.file)s' --output.fillFile '%(output.fillFile)s'"] job.command = '\n'.join(command) + '\n' job.command = job.command % { 'input.file': job.input_filename, 'output.fillFile': job.output_filename, 'template': job.fill_file_template, } return job
def create_job(self, message_id, resource): """! @brief Generate a Job which converts GRIB to NetCDF using the eva-adapter-support library. """ job = eva.job.Job(message_id, self.logger) reftime = resource.data.productinstance.reference_time job.data = { 'reftime': reftime, 'version': resource.data.productinstance.version, 'time_period_begin': resource.data.time_period_begin, 'time_period_end': resource.data.time_period_end, 'filename': reftime.strftime(self.env['EVA_OUTPUT_FILENAME_PATTERN']), } job.command = """#!/bin/bash #$ -S /bin/bash {lib_fg2nc}/grib2nc \ --input "{gribfile}" \ --output "{destfile}" \ --reference_time "{reftime}" \ --template_directory "{templatedir}" """.format( gribfile=eva.url_to_filename(resource.url), reftime=reftime.strftime("%Y-%m-%dT%H:%M:%S%z"), lib_fg2nc=self.env['EVA_FG2NC_LIB'], templatedir=self.env['EVA_FG2NC_TEMPLATEDIR'], destfile=job.data['filename'], ) return job
def create_job(self, message_id, resource): """! @brief Return a Job object that will check the file's md5sum against a stored hash in a corresponding file. """ job = eva.job.Job(message_id, self.logger) job.dataset_filename = eva.url_to_filename(resource.url) job.md5_filename = job.dataset_filename + '.md5' job.logger.info("Starting verification of file '%s' against md5sum file '%s'.", job.dataset_filename, job.md5_filename) lines = [ '#!/bin/bash', '#$ -S /bin/bash', # for GridEngine compatibility 'set -e', 'cat %(md5_filename)s', # for hash detection in generate_resources() 'printf "%%s %(dataset_filename)s\\n" $(cat %(md5_filename)s) | md5sum --check --status --strict -', 'rm -fv %(md5_filename)s >&2', ] values = { 'dataset_filename': job.dataset_filename, 'md5_filename': job.md5_filename, } job.command = "\n".join(lines) + "\n" job.command = job.command % values return job
def create_job(self, message_id, resource): reference_time = resource.data.productinstance.reference_time # Skip processing if the destination data set already exists. This # disables re-runs and duplicates unless the DataInstance objects are # marked as deleted. if self.post_to_productstatus(): qs = self.api.datainstance.objects.filter(data__productinstance__product=self.output_product, data__productinstance__reference_time=reference_time, servicebackend=self.output_service_backend, deleted=False) if qs.count() != 0: self.logger.warning("Destination data set already exists in Productstatus, skipping processing.") return job = eva.job.Job(message_id, self.logger) job.output_directory_template = self.template.from_string( self.env['EVA_CWF_OUTPUT_DIRECTORY_PATTERN'] ) job.output_directory = job.output_directory_template.render( reference_time=reference_time, domain=self.env['EVA_CWF_DOMAIN'], ) cmd = [] cmd += ['#/bin/bash'] cmd += ['#$ -S /bin/bash'] if self.env['EVA_CWF_PARALLEL'] > 1: cmd += ['#$ -pe mpi-fn %d' % self.env['EVA_CWF_PARALLEL']] for module in self.env['EVA_CWF_MODULES']: cmd += ['module load %s' % module] if self.env['EVA_CWF_PARALLEL'] > 1: cmd += ['export ECDIS_PARALLEL=1'] else: cmd += ['export ECDIS_PARALLEL=0'] cmd += ['export DATE=%s' % reference_time.strftime('%Y%m%d')] cmd += ['export DOMAIN=%s' % self.env['EVA_CWF_DOMAIN']] cmd += ['export ECDIS=%s' % eva.url_to_filename(resource.url)] cmd += ['export ECDIS_TMPDIR=%s' % os.path.join(job.output_directory, 'work')] cmd += ['export NDAYS_MAX=%d' % self.env['EVA_CWF_OUTPUT_DAYS']] cmd += ['export NREC_DAY_MIN=%d' % self.env['EVA_CWF_INPUT_MIN_DAYS']] cmd += ['export OUTDIR=%s' % job.output_directory] cmd += ['export UTC=%s' % reference_time.strftime('%H')] cmd += ['%s >&2' % self.env['EVA_CWF_SCRIPT_PATH']] # Run output recognition datestamp_glob = reference_time.strftime('*%Y%m%d_*.*') cmd += ['for file in %s; do' % os.path.join(job.output_directory, datestamp_glob)] cmd += [' if [[ $file =~ \.nc$ ]]; then'] cmd += [' echo -n "$file "'] cmd += [" ncdump -l 1000 -t -v time $file | grep -E '^ ?time\s*='"] cmd += [' elif [[ $file =~ \.nml$ ]]; then'] cmd += [' echo "$file"'] cmd += [' fi'] cmd += ['done'] job.command = "\n".join(cmd) + "\n" return job
def create_job(self, message_id, resource): """! @brief Create a Job object that will copy a file to another destination, and optionally post the result to Productstatus. """ job = eva.job.Job(message_id, self.logger) job.base_filename = os.path.basename(resource.url) job.input_file = eva.url_to_filename(resource.url) job.output_url = os.path.join(self.env['EVA_OUTPUT_BASE_URL'], job.base_filename) job.output_file = eva.url_to_filename(job.output_url) if self.post_to_productstatus(): job.service_backend = self.api.servicebackend[self.env['EVA_OUTPUT_SERVICE_BACKEND']] # check if the destination file already exists qs = self.api.datainstance.objects.filter(url=job.output_url, servicebackend=job.service_backend, data=resource.data, format=resource.format) if qs.count() != 0: job.logger.warning("Destination URL '%s' already exists in Productstatus; this file has already been distributed.", job.output_url) return lines = [ "#!/bin/bash", "#$ -S /bin/bash", # for GridEngine compatibility "`which lfs` cp --verbose %(source)s %(destination)s" ] values = { 'source': job.input_file, 'destination': job.output_file, } job.command = "\n".join(lines) + "\n" job.command = job.command % values return job
def create_job(self, message_id, resource): """! @brief Download a file, and optionally post the result to Productstatus. """ filename = eva.url_to_filename(resource.url) reference_time = resource.data.productinstance.reference_time template_variables = { 'reference_time': reference_time, 'datainstance': resource, } job = eva.job.Job(message_id, self.logger) # Render the Jinja2 templates and report any errors try: job.gridpp_params = { 'input.file': filename, 'input.options': self.in_opts.render(**template_variables), 'output.file': self.output_filename.render(**template_variables), 'output.options': self.out_opts.render(**template_variables), 'generic.options': self.generic_opts.render(**template_variables), } except Exception as e: raise eva.exceptions.InvalidConfigurationException(e) command = ["#!/bin/bash"] command += ["#$ -S /bin/bash"] command += ["set -e"] for module in self.env['EVA_GRIDPP_MODULES']: command += ["module load %s" % module] command += ["cp -v %(input.file)s %(output.file)s" % job.gridpp_params] command += ["export OMP_NUM_THREADS=%d" % self.env['EVA_GRIDPP_THREADS']] command += ["gridpp %(input.file)s %(input.options)s %(output.file)s %(output.options)s %(generic.options)s" % job.gridpp_params] job.command = '\n'.join(command) + '\n' return job
def create_job(self, message_id, resource): # Don't write any data to Productstatus. self.post_to_productstatus = False # Create a string template based on the EVA_OUTPUT_FILENAME_PATTERN # environment variable. This allows us to do string substitution and # filtering later on. output_filename_template = self.template.from_string( self.env['EVA_OUTPUT_FILENAME_PATTERN'] ) # Run string substitution and filtering. The template language is # Jinja2, and available filters can be found in the module # `eva.template`. # # E.g. # {{reference_time|timedelta(hours=6)|iso8601_compact}} # when reference_time is April 14th, 2016, 06:00:00 UTC, will yield # 20160414T120000Z output_filename = output_filename_template.render( reference_time=resource.data.productinstance.reference_time, ) # Instantiate a Job object, required if you are going to run an # external process, e.g. on GridEngine. job = eva.job.Job(message_id, self.logger) # The Job object contains a logger object, which you can use to print # status or debugging information. DO NOT USE "print", the output will # not be recorded in the production environment. # # Please read the Python logging tutorial: # https://docs.python.org/2/howto/logging.html#logging-basic-tutorial job.logger.info('Job resource: %s', resource) # Here, you write your processing script. There are no environment # variables; you must insert your variables using string interpolation. job.command = """ #!/bin/bash #-S /bin/bash echo convert_my_data \ --input '%(input)s' \ --output '%(output)s' \ --date '%(date)s' \ --backend '%(backend)s' """ # Interpolate variables into the processing script. job.command = job.command % { # The input filename always comes from Productstatus, and is always # an URL. Use `url_to_filename` to strip away the protocol. 'input': eva.url_to_filename(resource.url), # The output filename has already been put into a variable, now we # just supply it to the string interpolation hash. 'output': output_filename, # Our script requires the date and reference hour of the product # instance. This information is available from Productstatus. To # access it, we traverse the objects until we find the required # DateTime object, and then format it using strftime. 'date': resource.data.productinstance.reference_time.strftime('%Y-%m-%dT%H'), # For example purposes, we include more metadata information here. # In this example, we include the name of our storage backend. 'backend': resource.servicebackend.name, } # You may assign variables to the Job object that can be accessed from finish_job(). job.output_filename = output_filename # Our job is ready for execution. This command will run the job on an # Executor object, defined in the environment variable EVA_EXECUTOR. To # run jobs on GridEngine, use EVA_EXECUTOR=eva.executor.GridEngineExecutor. return job
def test_url_to_filename_wrong_protocol(self): with self.assertRaises(RuntimeError): eva.url_to_filename('https://example.com/foo.nc')
def test_url_to_filename(self): url = 'file:///foo/bar/baz.nc' filename = '/foo/bar/baz.nc' self.assertEqual(eva.url_to_filename(url), filename)