Esempio n. 1
0
    def cleanup_job_execution(self, job_exe):
        """See :meth:`job.execution.job_exe_cleaner.JobExecutionCleaner.cleanup_job_execution`
        """

        logger.info('Cleaning up a non-system job')

        download_dir = get_job_exe_input_data_dir(job_exe.id)
        download_work_dir = get_job_exe_input_work_dir(job_exe.id)
        upload_dir = get_job_exe_output_data_dir(job_exe.id)
        upload_work_dir = get_job_exe_output_work_dir(job_exe.id)

        logger.info('Cleaning up download directory')
        ScaleFile.objects.cleanup_download_dir(download_dir, download_work_dir)

        logger.info('Cleaning up upload directories')
        workspace_ids = job_exe.job.get_job_data().get_output_workspace_ids()
        for workspace in Workspace.objects.filter(id__in=workspace_ids):
            logger.info('Cleaning up upload directory for workspace %s',
                        workspace.name)
            ScaleFile.objects.cleanup_upload_dir(upload_dir, upload_work_dir,
                                                 workspace)

        move_work_dir = os.path.join(upload_work_dir,
                                     'move_source_file_in_workspace')
        if os.path.exists(move_work_dir):
            logger.info(
                'Cleaning up work directory for moving parsed source files')
            ScaleFile.objects.cleanup_move_dir(move_work_dir)
            logger.info('Deleting %s', move_work_dir)
            os.rmdir(move_work_dir)

        delete_normal_job_exe_dir_tree(job_exe.id)
Esempio n. 2
0
    def test_successful(self, mock_file_call, mock_file_list_call, mock_store,
                        mock_isfile):
        '''Tests calling JobData.store_output_data_files() successfully'''
        def new_isfile(path):
            return True

        mock_isfile.side_effect = new_isfile

        job_exe = MagicMock()
        job_exe.id = 1
        upload_dir = get_job_exe_output_data_dir(job_exe.id)
        data = {
            u'output_data': [{
                u'name': u'Param1',
                u'workspace_id': 1
            }, {
                u'name': u'Param2',
                u'workspace_id': 2
            }]
        }
        file_path_1 = os.path.join(upload_dir, u'path', u'1', u'my_file.txt')
        file_path_2 = os.path.join(upload_dir, u'path', u'2', u'my_file_2.txt')
        file_path_3 = os.path.join(upload_dir, u'path', u'3', u'my_file_3.txt')
        data_files = {
            u'Param1': (file_path_1, None),
            u'Param2': [(file_path_2, u'text/plain'), (file_path_3, None)]
        }

        JobData(data).store_output_data_files(data_files, job_exe)
        mock_file_call.assert_called_once_with(u'Param1', long(1))
        self.assertEqual(u'Param2', mock_file_list_call.call_args[0][0])
        self.assertSetEqual(set([long(3), long(2)]),
                            set(mock_file_list_call.call_args[0][1]))
Esempio n. 3
0
    def setup_job_dir(self, data_files, job_exe_id):
        '''Sets up the directory structure for a job execution and downloads the given files

        :param data_files: Dict with each file parameter name mapping to a bool indicating if the parameter accepts
            multiple files (True) and a relative directory path
        :type data_files: dict of str -> tuple(bool, str)
        :param job_exe_id: The job execution ID
        :type job_exe_id: int
        :returns: Dict with each file parameter name mapping to a list of absolute file paths of the written files
        :rtype: dict of str -> list of str
        '''

        download_dir = get_job_exe_input_data_dir(job_exe_id)
        download_work_dir = get_job_exe_input_work_dir(job_exe_id)
        upload_dir = get_job_exe_output_data_dir(job_exe_id)
        upload_work_dir = get_job_exe_output_work_dir(job_exe_id)

        # Download the job execution input files
        self.retrieve_input_data_files(download_dir, download_work_dir,
                                       data_files)

        # Set up upload directories for output workspace
        workspace_ids = self.get_output_workspace_ids()
        for workspace in Workspace.objects.filter(id__in=workspace_ids):
            ScaleFile.objects.setup_upload_dir(upload_dir, upload_work_dir,
                                               workspace)
Esempio n. 4
0
    def cleanup_job_execution(self, job_exe):
        """See :meth:`job.execution.job_exe_cleaner.JobExecutionCleaner.cleanup_job_execution`
        """

        logger.info('Cleaning up a non-system job')

        download_dir = get_job_exe_input_data_dir(job_exe.id)
        download_work_dir = get_job_exe_input_work_dir(job_exe.id)
        upload_dir = get_job_exe_output_data_dir(job_exe.id)
        upload_work_dir = get_job_exe_output_work_dir(job_exe.id)

        logger.info('Cleaning up download directory')
        ScaleFile.objects.cleanup_download_dir(download_dir, download_work_dir)

        logger.info('Cleaning up upload directories')
        workspace_ids = job_exe.job.get_job_data().get_output_workspace_ids()
        for workspace in Workspace.objects.filter(id__in=workspace_ids):
            logger.info('Cleaning up upload directory for workspace %s', workspace.name)
            ScaleFile.objects.cleanup_upload_dir(upload_dir, upload_work_dir, workspace)

        move_work_dir = os.path.join(upload_work_dir, 'move_source_file_in_workspace')
        if os.path.exists(move_work_dir):
            logger.info('Cleaning up work directory for moving parsed source files')
            ScaleFile.objects.cleanup_move_dir(move_work_dir)
            logger.info('Deleting %s', move_work_dir)
            os.rmdir(move_work_dir)

        delete_normal_job_exe_dir_tree(job_exe.id)
Esempio n. 5
0
    def test_file_in_command(self, mock_retrieve_call, mock_os_mkdir, mock_get_one_file, mock_setup_upload):
        job_work_dir = "/test"
        job_exe_id = 1
        job_input_dir = get_job_exe_input_data_dir(job_exe_id)
        job_output_dir = os.path.join(job_work_dir, u'outputs')

        def new_retrieve(arg1, arg2, arg3):
            return {u'file1_out': [input_file_path]}

        input_file_path = os.path.join(job_input_dir, 'file1', 'foo.txt')
        mock_retrieve_call.side_effect = new_retrieve
        mock_get_one_file.side_effect = lambda(arg1): input_file_path
        job_interface_dict, job_data_dict, job_environment_dict = self._get_simple_interface_data_env()
        job_interface_dict[u'command_arguments'] = u'${file1}'
        job_interface_dict[u'input_data'] = [{u'name' : u'file1', u'type' : u'file', 'required' : True}]
        job_data_dict[u'input_data'].append({u'name': u'file1', u'file_id': 1})
        job_data_dict[u'output_data'].append({u'name': u'file1_out', u'workspace_id': self.workspace.id})

        job_interface = JobInterface(job_interface_dict)
        job_data = JobData(job_data_dict)
        job_environment = JobEnvironment(job_environment_dict)
        
        
        job_interface.perform_pre_steps(job_data, job_environment, job_exe_id)
        job_command_arguments = job_interface.fully_populate_command_argument(job_data, job_environment, job_exe_id)
        self.assertEqual(job_command_arguments, input_file_path, u'expected a different command from pre_steps')
        mock_setup_upload.assert_called_once_with(get_job_exe_output_data_dir(job_exe_id), get_job_exe_output_work_dir(job_exe_id), self.workspace)
Esempio n. 6
0
    def setup_job_dir(self, data_files, job_exe_id):
        """Sets up the directory structure for a job execution and downloads the given files

        :param data_files: Dict with each file parameter name mapping to a bool indicating if the parameter accepts
            multiple files (True) and a relative directory path
        :type data_files: dict of str -> tuple(bool, str)
        :param job_exe_id: The job execution ID
        :type job_exe_id: int
        :returns: Dict with each file parameter name mapping to a list of absolute file paths of the written files
        :rtype: dict of str -> list of str
        """

        download_dir = get_job_exe_input_data_dir(job_exe_id)
        download_work_dir = get_job_exe_input_work_dir(job_exe_id)
        upload_dir = get_job_exe_output_data_dir(job_exe_id)
        upload_work_dir = get_job_exe_output_work_dir(job_exe_id)

        # Download the job execution input files
        self.retrieve_input_data_files(download_dir, download_work_dir, data_files)

        # Set up upload directories for output workspace
        workspace_ids = self.get_output_workspace_ids()
        for workspace in Workspace.objects.filter(id__in=workspace_ids):
            ScaleFile.objects.setup_upload_dir(upload_dir, upload_work_dir, workspace)

        # If the upload dir did not get created (e.g. no output files), make sure it gets created for results manifests
        if not os.path.exists(upload_dir):
            logger.info("Creating %s", upload_dir)
            os.makedirs(upload_dir, mode=0755)
Esempio n. 7
0
    def handle(self, **options):
        '''See :meth:`django.core.management.base.BaseCommand.handle`.

        This method starts the command.
        '''
        exe_id = options.get('job_exe_id')

        logger.info('Command starting: scale_pre_steps - Job Execution ID: %i', exe_id)
        try:
            node_work_dir = settings.NODE_WORK_DIR

            job_exe = JobExecution.objects.get_job_exe_with_job_and_job_type(exe_id)

            job_dir = file_system.get_job_exe_dir(exe_id, node_work_dir)
            input_dir = file_system.get_job_exe_input_dir(exe_id, node_work_dir)
            output_dir = file_system.get_job_exe_output_dir(exe_id, node_work_dir)
            job_dirs = [job_dir, input_dir, output_dir]
            for target_dir in job_dirs:
                self._create_job_dir(exe_id, target_dir)

            job_interface = job_exe.get_job_interface()
            job_data = job_exe.job.get_job_data()
            job_environment = job_exe.get_job_environment()
            job_interface.perform_pre_steps(job_data, job_environment, exe_id)
            command_args = job_interface.fully_populate_command_argument(job_data, job_environment, exe_id)

            # This shouldn't be necessary once we have user namespaces in docker
            self._chmod_job_dir(file_system.get_job_exe_output_data_dir(exe_id))

            # Perform a force pull for docker jobs to get the latest version of the image before running
            # TODO: Remove this hack in favor of the feature in Mesos 0.22.x, see MESOS-1886 for details
            docker_image = job_exe.job.job_type.docker_image
            if docker_image:
                logger.info('Pulling latest docker image: %s', docker_image)
                try:
                    subprocess.check_call(['sudo', 'docker', 'pull', docker_image])
                except subprocess.CalledProcessError:
                    logger.exception('Docker pull returned unexpected exit code.')
                except OSError:
                    logger.exception('OS unable to run docker pull command.')

            logger.info('Executing job: %i -> %s', exe_id, ' '.join(command_args))
            JobExecution.objects.pre_steps_command_arguments(exe_id, command_args)
        except Exception as e:
            logger.exception('Job Execution %i: Error performing pre-job steps', exe_id)

            exit_code = -1
            if isinstance(e, DatabaseError):
                exit_code = DB_EXIT_CODE
            elif isinstance(e, NfsError):
                exit_code = NFS_EXIT_CODE
            elif isinstance(e, IOError):
                exit_code = IO_EXIT_CODE
            sys.exit(exit_code)
        logger.info('Command completed: scale_pre_steps')
Esempio n. 8
0
    def test_output_dir_in_command(self):
        job_interface_dict, job_data_dict, job_environment_dict = self._get_simple_interface_data_env()
        job_interface_dict['command_arguments'] = '${job_output_dir}'

        job_interface = JobInterface(job_interface_dict)
        job_data = JobData(job_data_dict)
        job_environment = JobEnvironment(job_environment_dict)
        job_exe_id = 1
        job_output_dir = file_system.get_job_exe_output_data_dir(job_exe_id)

        job_interface.perform_pre_steps(job_data, job_environment, 1)
        job_command_arguments = job_interface.fully_populate_command_argument(job_data, job_environment, job_exe_id)
        self.assertEqual(job_command_arguments, job_output_dir, 'expected a different command from pre_steps')
Esempio n. 9
0
    def handle(self, **options):
        '''See :meth:`django.core.management.base.BaseCommand.handle`.

        This method starts the command.
        '''
        job_exe_id = options.get('job_exe_id')

        logger.info('Command starting: scale_pre_steps - Job Execution ID: %i', job_exe_id)
        try:
            job_exe = self._get_job_exe(job_exe_id)

            file_system.create_job_exe_dir(job_exe_id)
            file_system.create_normal_job_exe_dir_tree(job_exe_id)

            job_interface = job_exe.get_job_interface()
            job_data = job_exe.job.get_job_data()
            job_environment = job_exe.get_job_environment()
            job_interface.perform_pre_steps(job_data, job_environment, job_exe_id)
            command_args = job_interface.fully_populate_command_argument(job_data, job_environment, job_exe_id)

            # This shouldn't be necessary once we have user namespaces in docker
            self._chmod_job_dir(file_system.get_job_exe_output_data_dir(job_exe_id))

            # Perform a force pull for docker jobs to get the latest version of the image before running
            # TODO: Remove this hack in favor of the feature in Mesos 0.22.x, see MESOS-1886 for details
            docker_image = job_exe.job.job_type.docker_image
            if docker_image:
                logger.info('Pulling latest docker image: %s', docker_image)
                try:
                    subprocess.check_call(['sudo', 'docker', 'pull', docker_image])
                except subprocess.CalledProcessError:
                    logger.exception('Docker pull returned unexpected exit code.')
                except OSError:
                    logger.exception('OS unable to run docker pull command.')

            logger.info('Executing job: %i -> %s', job_exe_id, ' '.join(command_args))
            self._populate_command_arguments(job_exe_id, command_args)
        except Exception as ex:
            logger.exception('Job Execution %i: Error performing pre-job steps', job_exe_id)

            exit_code = -1
            if isinstance(ex, DatabaseError):
                exit_code = DB_EXIT_CODE
            elif isinstance(ex, NfsError):
                exit_code = NFS_EXIT_CODE
            elif isinstance(ex, IOError):
                exit_code = IO_EXIT_CODE
            sys.exit(exit_code)
        logger.info('Command completed: scale_pre_steps')
Esempio n. 10
0
    def test_output_dir_in_command(self):
        job_interface_dict, job_data_dict, job_environment_dict = self._get_simple_interface_data_env(
        )
        job_interface_dict['command_arguments'] = '${job_output_dir}'

        job_interface = JobInterface(job_interface_dict)
        job_data = JobData(job_data_dict)
        job_environment = JobEnvironment(job_environment_dict)
        job_exe_id = 1
        job_output_dir = file_system.get_job_exe_output_data_dir(job_exe_id)

        job_interface.perform_pre_steps(job_data, job_environment, 1)
        job_command_arguments = job_interface.fully_populate_command_argument(
            job_data, job_environment, job_exe_id)
        self.assertEqual(job_command_arguments, job_output_dir,
                         'expected a different command from pre_steps')
Esempio n. 11
0
    def test_file_in_command(self, mock_retrieve_call, mock_os_mkdir,
                             mock_get_one_file, mock_setup_upload):
        job_exe_id = 1
        job_input_dir = file_system.get_job_exe_input_data_dir(job_exe_id)

        def new_retrieve(arg1, arg2, arg3):
            return {
                'file1_out': [input_file_path],
            }

        input_file_path = os.path.join(job_input_dir, 'file1', 'foo.txt')
        mock_retrieve_call.side_effect = new_retrieve
        mock_get_one_file.side_effect = lambda (arg1): input_file_path
        job_interface_dict, job_data_dict, job_environment_dict = self._get_simple_interface_data_env(
        )
        job_interface_dict['command_arguments'] = '${file1}'
        job_interface_dict['input_data'] = [{
            'name': 'file1',
            'type': 'file',
            'required': True,
        }]
        job_data_dict['input_data'].append({
            'name': 'file1',
            'file_id': self.file.id,
        })
        job_data_dict['output_data'].append({
            'name': 'file1_out',
            'workspace_id': self.workspace.id,
        })

        job_interface = JobInterface(job_interface_dict)
        job_data = JobData(job_data_dict)
        job_environment = JobEnvironment(job_environment_dict)

        job_interface.perform_pre_steps(job_data, job_environment, job_exe_id)
        job_command_arguments = job_interface.fully_populate_command_argument(
            job_data, job_environment, job_exe_id)
        self.assertEqual(job_command_arguments, input_file_path,
                         'expected a different command from pre_steps')
        mock_setup_upload.assert_called_once_with(
            file_system.get_job_exe_output_data_dir(job_exe_id),
            file_system.get_job_exe_output_work_dir(job_exe_id),
            self.workspace)
Esempio n. 12
0
    def test_successful(self, mock_file_call, mock_file_list_call, mock_store, mock_isfile):
        '''Tests calling JobData.store_output_data_files() successfully'''

        def new_isfile(path):
            return True
        mock_isfile.side_effect = new_isfile

        job_exe = MagicMock()
        job_exe.id = 1
        upload_dir = get_job_exe_output_data_dir(job_exe.id)
        data = {u'output_data': [{u'name': u'Param1', u'workspace_id': 1},
                                 {u'name': u'Param2', u'workspace_id': 2}]}
        file_path_1 = os.path.join(upload_dir, u'path', u'1', u'my_file.txt')
        file_path_2 = os.path.join(upload_dir, u'path', u'2', u'my_file_2.txt')
        file_path_3 = os.path.join(upload_dir, u'path', u'3', u'my_file_3.txt')
        data_files = {u'Param1': (file_path_1, None), u'Param2': [(file_path_2, u'text/plain'), (file_path_3, None)]}

        JobData(data).store_output_data_files(data_files, job_exe)
        mock_file_call.assert_called_once_with(u'Param1', long(1))
        self.assertEqual(u'Param2', mock_file_list_call.call_args[0][0])
        self.assertSetEqual(set([long(3), long(2)]), set(mock_file_list_call.call_args[0][1]))
Esempio n. 13
0
    def cleanup_job_execution(self, job_exe):
        '''See :meth:`job.execution.job_exe_cleaner.JobExecutionCleaner.cleanup_job_execution`
        '''

        logger.info('Cleaning up a non-system job')

        download_dir = get_job_exe_input_data_dir(job_exe.id)
        download_work_dir = get_job_exe_input_work_dir(job_exe.id)
        upload_dir = get_job_exe_output_data_dir(job_exe.id)
        upload_work_dir = get_job_exe_output_work_dir(job_exe.id)

        logger.info('Cleaning up download directory')
        ScaleFile.objects.cleanup_download_dir(download_dir, download_work_dir)

        logger.info('Cleaning up upload directories')
        workspace_ids = job_exe.job.get_job_data().get_output_workspace_ids()
        for workspace in Workspace.objects.filter(id__in=workspace_ids):
            logger.info('Cleaning up upload directory for workspace %s', workspace.name)
            ScaleFile.objects.cleanup_upload_dir(upload_dir, upload_work_dir, workspace)

        save_job_exe_metrics(job_exe)
Esempio n. 14
0
    def fully_populate_command_argument(self, job_data, job_environment,
                                        job_exe_id):
        '''Return a fully populated command arguments string. If pre-steps are necessary
        (see are_pre_steps_needed), they should be run before this.  populated with information
        from the job_data, job_environment, job_input_dir, and job_output_dir.This gets the properties and
        input_files from the job_data, the shared_resources from the job_environment, and ${input}
        ${output_dir} from the work_dir.
        Throws a :class:`job.configuration.interface.exceptions.InvalidEnvironment` if the necessary
        pre-steps have not been performed

        :param job_data: The job data
        :type job_data: :class:`job.configuration.data.job_data.JobData`
        :param job_environment: The job environment
        :type job_environment: :class:`job.configuration.environment.job_environment.JobEnvironment`
        :param job_exe_id: The job execution ID
        :type job_exe_id: int
        '''
        # TODO: don't ignore job_envirnoment
        command_arguments = self.populate_command_argument_properties(job_data)

        job_input_dir = get_job_exe_input_data_dir(job_exe_id)
        job_output_dir = get_job_exe_output_data_dir(job_exe_id)

        for input_data in self.definition['input_data']:
            input_name = input_data['name']
            input_type = input_data['type']
            if input_type == 'file':
                param_dir = os.path.join(job_input_dir, input_name)
                file_path = self._get_one_file_from_directory(param_dir)
                command_arguments = self._replace_command_parameter(
                    command_arguments, input_name, file_path)
            elif input_type == 'files':
                #TODO: verify folder exists
                param_dir = os.path.join(job_input_dir, input_name)
                command_arguments = self._replace_command_parameter(
                    command_arguments, input_name, param_dir)

        command_arguments = self._replace_command_parameter(
            command_arguments, 'job_output_dir', job_output_dir)
        return command_arguments
Esempio n. 15
0
    def setup_job_dir(self, data_files, job_exe_id):
        '''Sets up the directory structure for a job execution and downloads the given files

        :param data_files: Dict with each file parameter name mapping to a bool indicating if the parameter accepts
            multiple files (True) and a relative directory path
        :type data_files: dict of str -> tuple(bool, str)
        :param job_exe_id: The job execution ID
        :type job_exe_id: int
        :returns: Dict with each file parameter name mapping to a list of absolute file paths of the written files
        :rtype: dict of str -> list of str
        '''

        download_dir = get_job_exe_input_data_dir(job_exe_id)
        download_work_dir = get_job_exe_input_work_dir(job_exe_id)
        upload_dir = get_job_exe_output_data_dir(job_exe_id)
        upload_work_dir = get_job_exe_output_work_dir(job_exe_id)

        # Download the job execution input files
        self.retrieve_input_data_files(download_dir, download_work_dir, data_files)

        # Set up upload directories for output workspace
        workspace_ids = self.get_output_workspace_ids()
        for workspace in Workspace.objects.filter(id__in=workspace_ids):
            ScaleFile.objects.setup_upload_dir(upload_dir, upload_work_dir, workspace)
Esempio n. 16
0
    def fully_populate_command_argument(self, job_data, job_environment, job_exe_id):
        '''Return a fully populated command arguments string. If pre-steps are necessary
        (see are_pre_steps_needed), they should be run before this.  populated with information
        from the job_data, job_environment, job_input_dir, and job_output_dir.This gets the properties and
        input_files from the job_data, the shared_resources from the job_environment, and ${input}
        ${output_dir} from the work_dir.
        Throws a :class:`job.configuration.interface.exceptions.InvalidEnvironment` if the necessary
        pre-steps have not been performed

        :param job_data: The job data
        :type job_data: :class:`job.configuration.data.job_data.JobData`
        :param job_environment: The job environment
        :type job_environment: :class:`job.configuration.environment.job_environment.JobEnvironment`
        :param job_exe_id: The job execution ID
        :type job_exe_id: int
        '''
        # TODO: don't ignore job_envirnoment
        command_arguments = self.populate_command_argument_properties(job_data)

        job_input_dir = get_job_exe_input_data_dir(job_exe_id)
        job_output_dir = get_job_exe_output_data_dir(job_exe_id)

        for input_data in self.definition['input_data']:
            input_name = input_data['name']
            input_type = input_data['type']
            if input_type == 'file':
                param_dir = os.path.join(job_input_dir, input_name)
                file_path = self._get_one_file_from_directory(param_dir)
                command_arguments = self._replace_command_parameter(command_arguments, input_name, file_path)
            elif input_type == 'files':
                #TODO: verify folder exists
                param_dir = os.path.join(job_input_dir, input_name)
                command_arguments = self._replace_command_parameter(command_arguments, input_name, param_dir)

        command_arguments = self._replace_command_parameter(command_arguments, 'job_output_dir', job_output_dir)
        return command_arguments
Esempio n. 17
0
    def test_files_in_command(self, mock_retrieve_call, mock_os_mkdir, mock_setup_upload):
        def new_retrieve(arg1, arg2, arg3):
            return {
                'files1_out': ['/test/file1/foo.txt', '/test/file1/bar.txt'],
            }

        mock_retrieve_call.side_effect = new_retrieve
        job_interface_dict, job_data_dict, job_environment_dict = self._get_simple_interface_data_env()
        job_interface_dict['command_arguments'] = '${files1}'
        job_interface_dict['input_data'] = [{
            'name': 'files1',
            'type': 'files',
            'required': True,
        }]
        job_data_dict['input_data'].append({
            'name': 'files1',
            'file_ids': [1, 2, 3],
        })
        job_data_dict['output_data'].append({
            'name': 'files1_out',
            'workspace_id': self.workspace.id,
        })

        job_interface = JobInterface(job_interface_dict)
        job_data = JobData(job_data_dict)
        job_environment = JobEnvironment(job_environment_dict)
        job_exe_id = 1
        job_input_dir = file_system.get_job_exe_input_data_dir(job_exe_id)

        job_interface.perform_pre_steps(job_data, job_environment, 1)
        job_command_arguments = job_interface.fully_populate_command_argument(job_data, job_environment, job_exe_id)
        expected_command_arguments = os.path.join(job_input_dir, 'files1')
        self.assertEqual(job_command_arguments, expected_command_arguments,
                         'expected a different command from pre_steps')
        mock_setup_upload.assert_called_once_with(file_system.get_job_exe_output_data_dir(job_exe_id),
                                                  file_system.get_job_exe_output_work_dir(job_exe_id), self.workspace)
Esempio n. 18
0
    def perform_post_steps(self, job_exe, job_data, stdoutAndStderr):
        '''Stores the files and deletes any working directories

        :param job_exe: The job execution model with related job and job_type fields
        :type job_exe: :class:`job.models.JobExecution`
        :param job_data: The job data
        :type job_data: :class:`job.configuration.data.job_data.JobData`
        :param stdoutAndStderr: the standard out from the job execution
        :type stdoutAndStderr: str
        :return: A tuple of the job results and the results manifest generated by the job execution
        :rtype: (:class:`job.configuration.results.job_results.JobResults`,
            :class:`job.configuration.results.results_manifest.results_manifest.ResultsManifest`)
        '''

        manifest_data = {}
        job_output_dir = get_job_exe_output_data_dir(job_exe.id)
        path_to_manifest_file = os.path.join(job_output_dir, 'results_manifest.json')
        if os.path.exists(path_to_manifest_file):
            logger.info('Opening results manifest...')
            with open(path_to_manifest_file, 'r') as manifest_file:
                manifest_data = json.loads(manifest_file.read())
                logger.info('Results manifest:')
                logger.info(manifest_data)
        else:
            logger.info('No results manifest found')

        results_manifest = ResultsManifest(manifest_data)
        stdout_files = self._get_artifacts_from_stdout(stdoutAndStderr)
        results_manifest.add_files(stdout_files)

        results_manifest.validate(self._output_file_manifest_dict)

        files_to_store = {}
        for manifest_file_entry in results_manifest.get_files():
            param_name = manifest_file_entry['name']

            media_type = None
            output_data_item = self._get_output_data_item_by_name(param_name)
            if output_data_item:
                media_type = output_data_item.get('media_type')

            if 'file' in manifest_file_entry:
                file_entry = manifest_file_entry['file']
                if 'geo_metadata' in file_entry:
                    files_to_store[param_name] = (file_entry['path'], media_type, file_entry['geo_metadata'])
                else:
                    files_to_store[param_name] = (file_entry['path'], media_type)
            elif 'files' in manifest_file_entry:
                file_tuples = []
                for file_entry in manifest_file_entry['files']:
                    if 'geo_metadata' in file_entry:
                        file_tuples.append((file_entry['path'], media_type, file_entry['geo_metadata']))
                    else:
                        file_tuples.append((file_entry['path'], media_type))
                files_to_store[param_name] = file_tuples

        job_data_parse_results = {}  # parse results formatted for job_data
        for parse_result in results_manifest.get_parse_results():
            filename = parse_result['filename']
            assert filename not in job_data_parse_results
            geo_metadata = parse_result.get('geo_metadata', {})
            geo_json = geo_metadata.get('geo_json', None)
            data_started = geo_metadata.get('data_started', None)
            data_ended = geo_metadata.get('data_ended', None)
            data_types = parse_result.get('data_types', [])
            new_workspace_path = parse_result.get('new_workspace_path', None)
            work_dir = None
            if new_workspace_path:
                new_workspace_path = os.path.join(new_workspace_path, filename)
                work_dir = os.path.join(get_job_exe_output_work_dir(job_exe.id), 'move_source_file_in_workspace')
            job_data_parse_results[filename] = (geo_json, data_started, data_ended, data_types, new_workspace_path,
                                                work_dir)

        job_data.save_parse_results(job_data_parse_results)
        return (job_data.store_output_data_files(files_to_store, job_exe), results_manifest)
Esempio n. 19
0
    def store_output_data_files(self, data_files, job_exe):
        '''Stores the given data output files

        :param data_files: Dict with each file parameter name mapping to a tuple of absolute local file path and media
            type (media type is optionally None) for a single file parameter and a list of tuples for a multiple file
            parameter
        :type data_files: dict of str -> tuple(str, str) or list of tuple(str, str)
        :param job_exe: The job execution model (with related job and job_type fields) that is storing the output data
            files
        :type job_exe: :class:`job.models.JobExecution`
        :returns: The job results
        :rtype: :class:`job.configuration.results.job_results.JobResults`
        '''

        upload_dir = get_job_exe_output_data_dir(job_exe.id)
        work_dir = get_job_exe_output_work_dir(job_exe.id)

        # Organize the data files
        workspace_files = {
        }  # Workspace ID -> list of (absolute local file path, media type)
        params_by_file_path = {
        }  # Absolute local file path -> output parameter name
        for name in data_files:
            file_output = self.data_outputs_by_name[name]
            workspace_id = file_output[u'workspace_id']
            if workspace_id in workspace_files:
                workspace_file_list = workspace_files[workspace_id]
            else:
                workspace_file_list = []
                workspace_files[workspace_id] = workspace_file_list
            data_file_entry = data_files[name]
            if isinstance(data_file_entry, list):
                for file_tuple in data_file_entry:
                    file_path = os.path.normpath(file_tuple[0])
                    if not os.path.isfile(file_path):
                        raise Exception('%s is not a valid file' % file_path)
                    params_by_file_path[file_path] = name
                    # Adjust file path to be relative to upload_dir
                    if len(file_tuple) == 2:
                        new_tuple = (os.path.relpath(file_path, upload_dir),
                                     file_tuple[1])
                    else:
                        new_tuple = (os.path.relpath(file_path, upload_dir),
                                     file_tuple[1], file_tuple[2])
                    workspace_file_list.append(new_tuple)
            else:
                file_path = os.path.normpath(data_file_entry[0])
                if not os.path.isfile(file_path):
                    raise Exception('%s is not a valid file' % file_path)
                params_by_file_path[file_path] = name
                # Adjust file path to be relative to upload_dir
                if len(data_file_entry) == 2:
                    new_tuple = (os.path.relpath(file_path, upload_dir),
                                 data_file_entry[1])
                else:
                    new_tuple = (os.path.relpath(file_path, upload_dir),
                                 data_file_entry[1], data_file_entry[2])
                workspace_file_list.append(new_tuple)

        data_file_store = DATA_FILE_STORE[u'DATA_FILE_STORE']
        if not data_file_store:
            raise Exception(u'No data file store found')
        stored_files = data_file_store.store_files(upload_dir, work_dir,
                                                   workspace_files,
                                                   self.get_input_file_ids(),
                                                   job_exe)

        # Organize results
        param_file_ids = {
        }  # Output parameter name -> file ID or list of file IDs
        for file_path in stored_files:
            file_id = stored_files[file_path]
            name = params_by_file_path[file_path]
            if isinstance(data_files[name], list):
                if name in param_file_ids:
                    file_id_list = param_file_ids[name]
                else:
                    file_id_list = []
                    param_file_ids[name] = file_id_list
                file_id_list.append(file_id)
            else:
                param_file_ids[name] = file_id

        # Create job results
        results = JobResults()
        for name in param_file_ids:
            param_entry = param_file_ids[name]
            if isinstance(param_entry, list):
                results.add_file_list_parameter(name, param_entry)
            else:
                results.add_file_parameter(name, param_entry)
        return results
Esempio n. 20
0
    def store_output_data_files(self, data_files, job_exe):
        """Stores the given data output files

        :param data_files: Dict with each file parameter name mapping to a tuple of absolute local file path and media
            type (media type is optionally None) for a single file parameter and a list of tuples for a multiple file
            parameter
        :type data_files: dict of str -> tuple(str, str) or list of tuple(str, str)
        :param job_exe: The job execution model (with related job and job_type fields) that is storing the output data
            files
        :type job_exe: :class:`job.models.JobExecution`
        :returns: The job results
        :rtype: :class:`job.configuration.results.job_results.JobResults`
        """

        upload_dir = get_job_exe_output_data_dir(job_exe.id)
        work_dir = get_job_exe_output_work_dir(job_exe.id)

        # Organize the data files
        workspace_files = {}  # Workspace ID -> list of (absolute local file path, media type)
        params_by_file_path = {}  # Absolute local file path -> output parameter name
        for name in data_files:
            file_output = self.data_outputs_by_name[name]
            workspace_id = file_output[u"workspace_id"]
            if workspace_id in workspace_files:
                workspace_file_list = workspace_files[workspace_id]
            else:
                workspace_file_list = []
                workspace_files[workspace_id] = workspace_file_list
            data_file_entry = data_files[name]
            if isinstance(data_file_entry, list):
                for file_tuple in data_file_entry:
                    file_path = os.path.normpath(file_tuple[0])
                    if not os.path.isfile(file_path):
                        raise Exception("%s is not a valid file" % file_path)
                    params_by_file_path[file_path] = name
                    # Adjust file path to be relative to upload_dir
                    if len(file_tuple) == 2:
                        new_tuple = (os.path.relpath(file_path, upload_dir), file_tuple[1])
                    else:
                        new_tuple = (os.path.relpath(file_path, upload_dir), file_tuple[1], file_tuple[2])
                    workspace_file_list.append(new_tuple)
            else:
                file_path = os.path.normpath(data_file_entry[0])
                if not os.path.isfile(file_path):
                    raise Exception("%s is not a valid file" % file_path)
                params_by_file_path[file_path] = name
                # Adjust file path to be relative to upload_dir
                if len(data_file_entry) == 2:
                    new_tuple = (os.path.relpath(file_path, upload_dir), data_file_entry[1])
                else:
                    new_tuple = (os.path.relpath(file_path, upload_dir), data_file_entry[1], data_file_entry[2])
                workspace_file_list.append(new_tuple)

        data_file_store = DATA_FILE_STORE[u"DATA_FILE_STORE"]
        if not data_file_store:
            raise Exception(u"No data file store found")
        stored_files = data_file_store.store_files(
            upload_dir, work_dir, workspace_files, self.get_input_file_ids(), job_exe
        )

        # Organize results
        param_file_ids = {}  # Output parameter name -> file ID or list of file IDs
        for file_path in stored_files:
            file_id = stored_files[file_path]
            name = params_by_file_path[file_path]
            if isinstance(data_files[name], list):
                if name in param_file_ids:
                    file_id_list = param_file_ids[name]
                else:
                    file_id_list = []
                    param_file_ids[name] = file_id_list
                file_id_list.append(file_id)
            else:
                param_file_ids[name] = file_id

        # Create job results
        results = JobResults()
        for name in param_file_ids:
            param_entry = param_file_ids[name]
            if isinstance(param_entry, list):
                results.add_file_list_parameter(name, param_entry)
            else:
                results.add_file_parameter(name, param_entry)
        return results
Esempio n. 21
0
    def perform_post_steps(self, job_exe, job_data, stdoutAndStderr):
        '''Stores the files and deletes any working directories

        :param job_exe: The job execution model with related job and job_type fields
        :type job_exe: :class:`job.models.JobExecution`
        :param job_data: The job data
        :type job_data: :class:`job.configuration.data.job_data.JobData`
        :param stdoutAndStderr: the standard out from the job execution
        :type stdoutAndStderr: str
        :return: A tuple of the job results and the results manifest generated by the job execution
        :rtype: (:class:`job.configuration.results.job_results.JobResults`,
            :class:`job.configuration.results.results_manifest.results_manifest.ResultsManifest`)
        '''

        manifest_data = {}
        job_output_dir = get_job_exe_output_data_dir(job_exe.id)
        path_to_manifest_file = os.path.join(job_output_dir,
                                             'results_manifest.json')
        if os.path.exists(path_to_manifest_file):
            logger.info('Opening results manifest...')
            with open(path_to_manifest_file, 'r') as manifest_file:
                manifest_data = json.loads(manifest_file.read())
                logger.info('Results manifest:')
                logger.info(manifest_data)
        else:
            logger.info('No results manifest found')

        results_manifest = ResultsManifest(manifest_data)
        stdout_files = self._get_artifacts_from_stdout(stdoutAndStderr)
        results_manifest.add_files(stdout_files)

        results_manifest.validate(self._output_file_manifest_dict)

        files_to_store = {}
        for manifest_file_entry in results_manifest.get_files():
            param_name = manifest_file_entry['name']

            media_type = None
            output_data_item = self._get_output_data_item_by_name(param_name)
            if output_data_item:
                media_type = output_data_item.get('media_type')

            if 'file' in manifest_file_entry:
                file_entry = manifest_file_entry['file']
                if 'geo_metadata' in file_entry:
                    files_to_store[param_name] = (file_entry['path'],
                                                  media_type,
                                                  file_entry['geo_metadata'])
                else:
                    files_to_store[param_name] = (file_entry['path'],
                                                  media_type)
            elif 'files' in manifest_file_entry:
                file_tuples = []
                for file_entry in manifest_file_entry['files']:
                    if 'geo_metadata' in file_entry:
                        file_tuples.append((file_entry['path'], media_type,
                                            file_entry['geo_metadata']))
                    else:
                        file_tuples.append((file_entry['path'], media_type))
                files_to_store[param_name] = file_tuples

        job_data_parse_results = {}  # parse results formatted for job_data
        for parse_result in results_manifest.get_parse_results():
            filename = parse_result['filename']
            assert filename not in job_data_parse_results
            geo_metadata = parse_result.get('geo_metadata', {})
            geo_json = geo_metadata.get('geo_json', None)
            data_started = geo_metadata.get('data_started', None)
            data_ended = geo_metadata.get('data_ended', None)
            data_types = parse_result.get('data_types', [])
            new_workspace_path = parse_result.get('new_workspace_path', None)
            work_dir = None
            if new_workspace_path:
                new_workspace_path = os.path.join(new_workspace_path, filename)
                work_dir = os.path.join(
                    get_job_exe_output_work_dir(job_exe.id),
                    'move_source_file_in_workspace')
            job_data_parse_results[filename] = (geo_json, data_started,
                                                data_ended, data_types,
                                                new_workspace_path, work_dir)

        job_data.save_parse_results(job_data_parse_results)
        return (job_data.store_output_data_files(files_to_store,
                                                 job_exe), results_manifest)