Example #1
0
 def test_finish_job_failed(self):
     """!
     @brief Test that the adapter skips failed jobs.
     """
     self.create_adapter()
     resource = mock.MagicMock()
     job = self.create_job(resource)
     job.set_status(eva.job.FAILED)
     self.adapter.finish_job(job)
Example #2
0
    def sync(self, job):
        """!
        @brief Poll Grid Engine for job completion.
        """

        # Create SSH connection
        try:
            self.ensure_ssh_connection(job)
        except SSH_RETRY_EXCEPTIONS as e:
            raise eva.exceptions.RetryException(e)

        # Poll for job completion
        check_command = self.create_qacct_command(job.pid)
        try:
            job.logger.debug('Running: %s', check_command)
            exit_code, stdout, stderr = self.execute_ssh_command(check_command)
        except SSH_RETRY_EXCEPTIONS as e:
            raise eva.exceptions.RetryException(e)
        if exit_code != EXIT_OK:
            job.logger.debug('Job %d has not completed yet.', job.pid)
            job.set_next_poll_time(QACCT_CHECK_INTERVAL_MSECS)
            return False
        job.exit_code = get_exit_code_from_qacct_output(stdout)

        # Submit job metrics
        stats = parse_qacct_metrics(stdout.splitlines())
        for metric, value in stats['metrics'].items():
            self.statsd.timing(metric, value, stats['tags'])

        # Retrieve stdout and stderr
        try:
            with self.sftp_client.open(job.stdout_path, 'r') as f:
                job.stdout = eva.executor.strip_stdout_newlines(f.readlines())
            with self.sftp_client.open(job.stderr_path, 'r') as f:
                job.stderr = eva.executor.strip_stdout_newlines(f.readlines())
        except SSH_RETRY_EXCEPTIONS + (IOError,) as e:
            raise eva.exceptions.RetryException(
                'Unable to retrieve stdout and stderr from finished Grid Engine job.'
            )

        # Set job exit status
        if job.exit_code == EXIT_OK:
            job.set_status(eva.job.COMPLETE)
        else:
            job.set_status(eva.job.FAILED)

        # Print stdout and stderr
        eva.executor.log_stdout_stderr(job, job.stdout, job.stderr)

        # Remove job script, stdout, and stderr caches
        try:
            self.sftp_client.unlink(job.submit_script_path)
            self.sftp_client.unlink(job.stdout_path)
            self.sftp_client.unlink(job.stderr_path)
        except SSH_RETRY_EXCEPTIONS + (IOError,) as e:
            job.logger.warning('Could not remove script file, stdout and stderr')
Example #3
0
 def test_finish_job_ignore(self):
     """!
     @brief Test that the job is created correctly.
     """
     self.create_adapter()
     resource = mock.MagicMock()
     resource.url = 'file:///path/to/foo.bar'
     job = self.create_job(resource)
     job.set_status(eva.job.FAILED)
     self.adapter.finish_job(job)
Example #4
0
 def test_generate_resources(self):
     """!
     @brief Test that the adapter generates correct resources for the job output.
     """
     self.create_adapter()
     resource = mock.MagicMock()
     resource.url = 'file:///path/to/foo.bar'
     job = self.create_job(resource)
     job.set_status(eva.job.COMPLETE)
     resources = self.adapter.default_resource_dictionary()
     self.adapter.generate_resources(job, resources)
     self.assertEqual(len(resources['productinstance']), 0)
     self.assertEqual(len(resources['data']), 0)
     self.assertEqual(len(resources['datainstance']), 1)
     self.assertEqual(resources['datainstance'][0].args[0]['url'], 'http://bar/baz/foo.bar')
Example #5
0
 def test_finish_job_and_generate_resources(self):
     """!
     @brief Test that job finish works and doesn't throw any exceptions.
     """
     self.create_adapter()
     resource = mock.MagicMock()
     with httmock.HTTMock(*eva.tests.schemas.SCHEMAS):
         job = self.create_job(resource)
     job.set_status(eva.job.COMPLETE)
     self.adapter.finish_job(job)
     resources = self.generate_resources(job)
     self.assertEqual(len(resources["productinstance"]), 1)
     self.assertEqual(len(resources["data"]), 1)
     self.assertEqual(len(resources["datainstance"]), 1)
     self.assertEqual(resources["datainstance"][0].args[0]["servicebackend"], self.adapter.output_service_backend)
Example #6
0
 def test_finish_job_and_generate_resources(self):
     """!
     @brief Test that job finish works and doesn't throw any exceptions, and
     that generated resources has their deleted flag set to True.
     """
     self.create_adapter()
     resource = mock.MagicMock()
     self.adapter.api = mock.MagicMock()
     with httmock.HTTMock(*eva.tests.schemas.SCHEMAS):
         job = self.create_job(resource)
     job.set_status(eva.job.COMPLETE)
     self.adapter.finish_job(job)
     job.instance_list = [mock.MagicMock(), mock.MagicMock()]
     with httmock.HTTMock(*eva.tests.schemas.SCHEMAS):
         resources = self.generate_resources(job)
     self.assertTrue(resources['datainstance'][0].deleted)
     self.assertTrue(resources['datainstance'][1].deleted)
Example #7
0
 def test_finish_job_and_generate_resources(self):
     """!
     @brief Test that job finish works and doesn't throw any exceptions.
     """
     self.create_adapter()
     resource = mock.MagicMock()
     resource.url = 'file:///foo/bar/baz'
     resource.data.productinstance.reference_time = eva.coerce_to_utc(datetime.datetime(2016, 1, 1, 18, 15, 0))
     with httmock.HTTMock(*eva.tests.schemas.SCHEMAS):
         job = self.create_job(resource)
         job.set_status(eva.job.COMPLETE)
         self.adapter.finish_job(job)
         resources = self.generate_resources(job)
     self.assertEqual(len(resources['productinstance']), 1)
     self.assertEqual(len(resources['data']), 1)
     self.assertEqual(len(resources['datainstance']), 1)
     self.assertEqual(resources['datainstance'][0].url, 'file:///out/20160101T181500Z')
Example #8
0
 def test_generate_resources(self):
     """!
     @brief Test that the adapter skips failed jobs.
     """
     self.create_adapter()
     resource = mock.MagicMock()
     job = self.create_job(resource)
     job.set_status(eva.job.COMPLETE)
     self.adapter.finish_job(job)
     md5sum = '401b30e3b8b5d629635a5c613cdb7919'
     job.stdout = md5sum
     resources = self.generate_resources(job)
     self.assertEqual(resources['datainstance'][0].hash_type, str('md5'))
     self.assertEqual(resources['datainstance'][0].hash, md5sum)
     self.assertEqual(len(resources['productinstance']), 0)
     self.assertEqual(len(resources['data']), 0)
     self.assertEqual(len(resources['datainstance']), 1)
Example #9
0
 def test_finish_job_and_generate_resources(self):
     """!
     @brief Test that job finish works and doesn't throw any exceptions.
     """
     del self.env['EVA_PRODUCTSTATUS_API_KEY']
     self.create_adapter()
     resource = mock.MagicMock()
     resource.url = 'file:///foo/bar/baz'
     self.adapter.api = mock.MagicMock()
     job = self.create_job(resource)
     job.service_backend = 'foo'
     job.set_status(eva.job.COMPLETE)
     self.adapter.finish_job(job)
     resources = self.generate_resources(job)
     self.assertEqual(len(resources['productinstance']), 0)
     self.assertEqual(len(resources['data']), 0)
     self.assertEqual(len(resources['datainstance']), 1)
     self.assertEqual(resources['datainstance'][0].url, 'file:///foo/baz')
Example #10
0
    def test_generate_resources(self):
        self.create_adapter()

        self.adapter.output_product = mock.MagicMock()
        self.adapter.output_data_format = mock.MagicMock()
        self.adapter.output_service_backend = mock.MagicMock()
        self.adapter.nml_data_format = mock.MagicMock()

        resource = mock.MagicMock()
        job = self.create_job(resource)
        job.stdout = [
            '/tmp/meteo20160606_00.nc  time = "2016-06-06 12", "2016-06-06 15", "2016-06-06 18", "2016-06-06 21", "2016-06-07" ;',
            '/tmp/meteo20160606_00.nml',
        ]
        job.output_files = self.adapter.parse_file_recognition_output(job.stdout)
        job.resource.data.productinstance.reference_time = eva.coerce_to_utc(datetime.datetime(2016, 6, 6, 12))

        job.set_status(eva.job.COMPLETE)
        self.adapter.finish_job(job)

        with httmock.HTTMock(*eva.tests.schemas.SCHEMAS):
            resources = self.generate_resources(job)

        self.assertEqual(resources['productinstance'][0].args[0]['product'], self.adapter.output_product)
        self.assertEqual(resources['datainstance'][0].url, 'file:///tmp/meteo20160606_00.nc')
        self.assertEqual(resources['datainstance'][0].format, self.adapter.output_data_format)
        self.assertEqual(resources['data'][0].args[0]['time_period_begin'],
                         eva.coerce_to_utc(datetime.datetime(2016, 6, 6, 12)))
        self.assertEqual(resources['data'][0].args[0]['time_period_end'],
                         eva.coerce_to_utc(datetime.datetime(2016, 6, 7)))

        self.assertEqual(resources['datainstance'][1].url, 'file:///tmp/meteo20160606_00.nml')
        self.assertEqual(resources['datainstance'][1].format, self.adapter.nml_data_format)
        self.assertEqual(resources['data'][1].args[0]['time_period_begin'], None)
        self.assertEqual(resources['data'][1].args[0]['time_period_end'], None)

        self.assertEqual(len(resources['productinstance']), 1)
        self.assertEqual(len(resources['data']), 2)
        self.assertEqual(len(resources['datainstance']), 2)
Example #11
0
File: null.py Project: metno/EVA
 def sync(self, job):
     job.set_status(eva.job.COMPLETE)
     job.exit_code = 0
     job.stdout = []
     job.stderr = []
Example #12
0
File: null.py Project: metno/EVA
 def execute_async(self, job):
     job.logger.info("Faking job execution and setting exit code to zero.")
     job.set_status(eva.job.INITIALIZED)
     job.set_status(eva.job.STARTED)
Example #13
0
    def execute_async(self, job):
        """!
        @brief Execute a job on Grid Engine.
        """

        skip_submit = False

        # Create SSH connection
        try:
            self.ensure_ssh_connection(job)
        except SSH_RETRY_EXCEPTIONS as e:
            raise eva.exceptions.RetryException(e)

        # Check whether a GridEngine task is already running for this job. If
        # it is, we skip submitting the job and jump right to the qacct polling.
        job.logger.info('Querying if job is already running.')
        job_id = create_job_unique_id(self.group_id, job.id)
        command = 'qstat -j %s' % job_id
        try:
            exit_code, stdout, stderr = self.execute_ssh_command(command)
            if exit_code == 0:
                job.pid = get_job_id_from_qstat_output(stdout)
                job.logger.warning('Job is already running with JOB_ID %d, will not submit a new job.', job.pid)
                job.set_status(eva.job.STARTED)
                skip_submit = True
            else:
                job.logger.info('Job is not running, continuing with submission.')
        except SSH_RETRY_EXCEPTIONS as e:
            raise eva.exceptions.RetryException(e)

        # Generate paths
        job.stdout_path = self.create_job_filename(job_id, 'stdout')
        job.stderr_path = self.create_job_filename(job_id, 'stderr')
        job.submit_script_path = self.create_job_filename(job_id, 'sh')

        # Skip submitting the job if it already exists
        if not skip_submit:

            # Create a submit script
            try:
                with self.sftp_client.open(job.submit_script_path, 'w') as submit_script:
                    script_content = job.command
                    submit_script.write(script_content)
            except SSH_RETRY_EXCEPTIONS as e:
                raise eva.exceptions.RetryException(e)

            # Print the job script to the log
            eva.executor.log_job_script(job)

            # Submit the job using qsub
            command = ['qsub',
                       '-N', job_id,
                       '-b', 'n',
                       '-sync', 'n',
                       '-o', job.stdout_path,
                       '-e', job.stderr_path,
                       ]

            # Run jobs in a specified queue
            if self.env['EVA_GRIDENGINE_QUEUE']:
                command += ['-q', self.env['EVA_GRIDENGINE_QUEUE']]

            command += [job.submit_script_path]

            command = ' '.join(command)
            job.logger.info('Submitting job to GridEngine: %s', command)

            # Execute command asynchronously
            try:
                exit_code, stdout, stderr = self.execute_ssh_command(command)
                if exit_code != EXIT_OK:
                    raise eva.exceptions.RetryException(
                        'Failed to submit the job to GridEngine, exit code %d' %
                        exit_code
                    )
                job.pid = get_job_id_from_qsub_output(eva.executor.get_std_lines(stdout)[0])
                job.logger.info('Job has been submitted, JOB_ID = %d', job.pid)
                job.set_status(eva.job.STARTED)
                job.set_next_poll_time(QACCT_CHECK_INTERVAL_MSECS)
            except SSH_RETRY_EXCEPTIONS as e:
                raise eva.exceptions.RetryException(e)