コード例 #1
0
ファイル: fimex_fill_file.py プロジェクト: metno/EVA
    def create_job(self, message_id, resource):
        job = eva.job.Job(message_id, self.logger)

        job.input_filename = eva.url_to_filename(resource.url)
        job.template_variables = {
            'datainstance': resource,
            'input_filename': os.path.basename(job.input_filename),
            'reference_time': resource.data.productinstance.reference_time,
        }

        # Render the Jinja2 templates and report any errors
        try:
            job.fill_file_template = self.fill_file_template.render(**job.template_variables)
            job.output_filename = self.output_filename.render(**job.template_variables)
        except Exception as e:
            raise eva.exceptions.InvalidConfigurationException(e)

        # Generate Fimex job
        command = ['#!/bin/bash']
        command += ['#$ -S /bin/bash']
        command += ["[ ! -f '%(output.fillFile)s' ] && cp -v '%(template)s' '%(output.fillFile)s'"]
        command += ["time fimex --input.file '%(input.file)s' --output.fillFile '%(output.fillFile)s'"]

        job.command = '\n'.join(command) + '\n'
        job.command = job.command % {
            'input.file': job.input_filename,
            'output.fillFile': job.output_filename,
            'template': job.fill_file_template,
        }

        return job
コード例 #2
0
ファイル: fimex_grib_to_netcdf.py プロジェクト: metno/EVA
    def create_job(self, message_id, resource):
        """!
        @brief Generate a Job which converts GRIB to NetCDF using the
        eva-adapter-support library.
        """
        job = eva.job.Job(message_id, self.logger)

        reftime = resource.data.productinstance.reference_time

        job.data = {
            'reftime': reftime,
            'version': resource.data.productinstance.version,
            'time_period_begin': resource.data.time_period_begin,
            'time_period_end': resource.data.time_period_end,
            'filename': reftime.strftime(self.env['EVA_OUTPUT_FILENAME_PATTERN']),
        }

        job.command = """#!/bin/bash
#$ -S /bin/bash
{lib_fg2nc}/grib2nc \
--input "{gribfile}" \
--output "{destfile}" \
--reference_time "{reftime}" \
--template_directory "{templatedir}"
""".format(
            gribfile=eva.url_to_filename(resource.url),
            reftime=reftime.strftime("%Y-%m-%dT%H:%M:%S%z"),
            lib_fg2nc=self.env['EVA_FG2NC_LIB'],
            templatedir=self.env['EVA_FG2NC_TEMPLATEDIR'],
            destfile=job.data['filename'],
        )

        return job
コード例 #3
0
ファイル: checksum.py プロジェクト: metno/EVA
    def create_job(self, message_id, resource):
        """!
        @brief Return a Job object that will check the file's md5sum against a
        stored hash in a corresponding file.
        """
        job = eva.job.Job(message_id, self.logger)
        job.dataset_filename = eva.url_to_filename(resource.url)
        job.md5_filename = job.dataset_filename + '.md5'
        job.logger.info("Starting verification of file '%s' against md5sum file '%s'.", job.dataset_filename, job.md5_filename)

        lines = [
            '#!/bin/bash',
            '#$ -S /bin/bash',  # for GridEngine compatibility
            'set -e',
            'cat %(md5_filename)s',  # for hash detection in generate_resources()
            'printf "%%s  %(dataset_filename)s\\n" $(cat %(md5_filename)s) | md5sum --check --status --strict -',
            'rm -fv %(md5_filename)s >&2',
        ]
        values = {
            'dataset_filename': job.dataset_filename,
            'md5_filename': job.md5_filename,
        }

        job.command = "\n".join(lines) + "\n"
        job.command = job.command % values
        return job
コード例 #4
0
ファイル: cwf.py プロジェクト: metno/EVA
    def create_job(self, message_id, resource):
        reference_time = resource.data.productinstance.reference_time

        # Skip processing if the destination data set already exists. This
        # disables re-runs and duplicates unless the DataInstance objects are
        # marked as deleted.
        if self.post_to_productstatus():
            qs = self.api.datainstance.objects.filter(data__productinstance__product=self.output_product,
                                                      data__productinstance__reference_time=reference_time,
                                                      servicebackend=self.output_service_backend,
                                                      deleted=False)
            if qs.count() != 0:
                self.logger.warning("Destination data set already exists in Productstatus, skipping processing.")
                return

        job = eva.job.Job(message_id, self.logger)
        job.output_directory_template = self.template.from_string(
            self.env['EVA_CWF_OUTPUT_DIRECTORY_PATTERN']
        )
        job.output_directory = job.output_directory_template.render(
            reference_time=reference_time,
            domain=self.env['EVA_CWF_DOMAIN'],
        )

        cmd = []
        cmd += ['#/bin/bash']
        cmd += ['#$ -S /bin/bash']
        if self.env['EVA_CWF_PARALLEL'] > 1:
            cmd += ['#$ -pe mpi-fn %d' % self.env['EVA_CWF_PARALLEL']]
        for module in self.env['EVA_CWF_MODULES']:
            cmd += ['module load %s' % module]
        if self.env['EVA_CWF_PARALLEL'] > 1:
            cmd += ['export ECDIS_PARALLEL=1']
        else:
            cmd += ['export ECDIS_PARALLEL=0']
        cmd += ['export DATE=%s' % reference_time.strftime('%Y%m%d')]
        cmd += ['export DOMAIN=%s' % self.env['EVA_CWF_DOMAIN']]
        cmd += ['export ECDIS=%s' % eva.url_to_filename(resource.url)]
        cmd += ['export ECDIS_TMPDIR=%s' % os.path.join(job.output_directory, 'work')]
        cmd += ['export NDAYS_MAX=%d' % self.env['EVA_CWF_OUTPUT_DAYS']]
        cmd += ['export NREC_DAY_MIN=%d' % self.env['EVA_CWF_INPUT_MIN_DAYS']]
        cmd += ['export OUTDIR=%s' % job.output_directory]
        cmd += ['export UTC=%s' % reference_time.strftime('%H')]
        cmd += ['%s >&2' % self.env['EVA_CWF_SCRIPT_PATH']]

        # Run output recognition
        datestamp_glob = reference_time.strftime('*%Y%m%d_*.*')
        cmd += ['for file in %s; do' % os.path.join(job.output_directory, datestamp_glob)]
        cmd += ['    if [[ $file =~ \.nc$ ]]; then']
        cmd += ['        echo -n "$file "']
        cmd += ["        ncdump -l 1000 -t -v time $file | grep -E '^ ?time\s*='"]
        cmd += ['    elif [[ $file =~ \.nml$ ]]; then']
        cmd += ['        echo "$file"']
        cmd += ['    fi']
        cmd += ['done']

        job.command = "\n".join(cmd) + "\n"

        return job
コード例 #5
0
ファイル: distribution.py プロジェクト: metno/EVA
    def create_job(self, message_id, resource):
        """!
        @brief Create a Job object that will copy a file to another
        destination, and optionally post the result to Productstatus.
        """
        job = eva.job.Job(message_id, self.logger)
        job.base_filename = os.path.basename(resource.url)
        job.input_file = eva.url_to_filename(resource.url)
        job.output_url = os.path.join(self.env['EVA_OUTPUT_BASE_URL'], job.base_filename)
        job.output_file = eva.url_to_filename(job.output_url)

        if self.post_to_productstatus():
            job.service_backend = self.api.servicebackend[self.env['EVA_OUTPUT_SERVICE_BACKEND']]
            # check if the destination file already exists
            qs = self.api.datainstance.objects.filter(url=job.output_url,
                                                      servicebackend=job.service_backend,
                                                      data=resource.data,
                                                      format=resource.format)
            if qs.count() != 0:
                job.logger.warning("Destination URL '%s' already exists in Productstatus; this file has already been distributed.", job.output_url)
                return

        lines = [
            "#!/bin/bash",
            "#$ -S /bin/bash",  # for GridEngine compatibility
            "`which lfs` cp --verbose %(source)s %(destination)s"
        ]
        values = {
            'source': job.input_file,
            'destination': job.output_file,
        }

        job.command = "\n".join(lines) + "\n"
        job.command = job.command % values

        return job
コード例 #6
0
ファイル: gridpp.py プロジェクト: metno/EVA
    def create_job(self, message_id, resource):
        """!
        @brief Download a file, and optionally post the result to Productstatus.
        """
        filename = eva.url_to_filename(resource.url)
        reference_time = resource.data.productinstance.reference_time
        template_variables = {
            'reference_time': reference_time,
            'datainstance': resource,
        }

        job = eva.job.Job(message_id, self.logger)

        # Render the Jinja2 templates and report any errors
        try:
            job.gridpp_params = {
                'input.file': filename,
                'input.options': self.in_opts.render(**template_variables),
                'output.file': self.output_filename.render(**template_variables),
                'output.options': self.out_opts.render(**template_variables),
                'generic.options': self.generic_opts.render(**template_variables),
            }
        except Exception as e:
            raise eva.exceptions.InvalidConfigurationException(e)

        command = ["#!/bin/bash"]
        command += ["#$ -S /bin/bash"]
        command += ["set -e"]
        for module in self.env['EVA_GRIDPP_MODULES']:
            command += ["module load %s" % module]
        command += ["cp -v %(input.file)s %(output.file)s" % job.gridpp_params]
        command += ["export OMP_NUM_THREADS=%d" % self.env['EVA_GRIDPP_THREADS']]
        command += ["gridpp %(input.file)s %(input.options)s %(output.file)s %(output.options)s %(generic.options)s" % job.gridpp_params]
        job.command = '\n'.join(command) + '\n'

        return job
コード例 #7
0
ファイル: example.py プロジェクト: metno/EVA
    def create_job(self, message_id, resource):

        # Don't write any data to Productstatus.
        self.post_to_productstatus = False

        # Create a string template based on the EVA_OUTPUT_FILENAME_PATTERN
        # environment variable. This allows us to do string substitution and
        # filtering later on.
        output_filename_template = self.template.from_string(
            self.env['EVA_OUTPUT_FILENAME_PATTERN']
        )

        # Run string substitution and filtering. The template language is
        # Jinja2, and available filters can be found in the module
        # `eva.template`.
        #
        # E.g.
        #  {{reference_time|timedelta(hours=6)|iso8601_compact}}
        # when reference_time is April 14th, 2016, 06:00:00 UTC, will yield
        #  20160414T120000Z
        output_filename = output_filename_template.render(
            reference_time=resource.data.productinstance.reference_time,
        )

        # Instantiate a Job object, required if you are going to run an
        # external process, e.g. on GridEngine.
        job = eva.job.Job(message_id, self.logger)

        # The Job object contains a logger object, which you can use to print
        # status or debugging information. DO NOT USE "print", the output will
        # not be recorded in the production environment.
        #
        # Please read the Python logging tutorial:
        # https://docs.python.org/2/howto/logging.html#logging-basic-tutorial
        job.logger.info('Job resource: %s', resource)

        # Here, you write your processing script. There are no environment
        # variables; you must insert your variables using string interpolation.
        job.command = """
#!/bin/bash
#-S /bin/bash
echo convert_my_data \
    --input '%(input)s' \
    --output '%(output)s' \
    --date '%(date)s' \
    --backend '%(backend)s'
"""

        # Interpolate variables into the processing script.
        job.command = job.command % {

            # The input filename always comes from Productstatus, and is always
            # an URL. Use `url_to_filename` to strip away the protocol.
            'input': eva.url_to_filename(resource.url),

            # The output filename has already been put into a variable, now we
            # just supply it to the string interpolation hash.
            'output': output_filename,

            # Our script requires the date and reference hour of the product
            # instance. This information is available from Productstatus. To
            # access it, we traverse the objects until we find the required
            # DateTime object, and then format it using strftime.
            'date': resource.data.productinstance.reference_time.strftime('%Y-%m-%dT%H'),

            # For example purposes, we include more metadata information here.
            # In this example, we include the name of our storage backend.
            'backend': resource.servicebackend.name,

        }

        # You may assign variables to the Job object that can be accessed from finish_job().
        job.output_filename = output_filename

        # Our job is ready for execution. This command will run the job on an
        # Executor object, defined in the environment variable EVA_EXECUTOR. To
        # run jobs on GridEngine, use EVA_EXECUTOR=eva.executor.GridEngineExecutor.
        return job
コード例 #8
0
ファイル: test_base.py プロジェクト: metno/EVA
 def test_url_to_filename_wrong_protocol(self):
     with self.assertRaises(RuntimeError):
         eva.url_to_filename('https://example.com/foo.nc')
コード例 #9
0
ファイル: test_base.py プロジェクト: metno/EVA
 def test_url_to_filename(self):
     url = 'file:///foo/bar/baz.nc'
     filename = '/foo/bar/baz.nc'
     self.assertEqual(eva.url_to_filename(url), filename)