Beispiel #1
0
    def validate(self, output_file_definitions):
        """Validates the results manifest against given output file definitions. This does not validate that the
        parse_data matches the job data inputs.

        :param output_file_definitions: A dictionary with each output param name mapped to a tuple with
            (is_multiple (bool), required(bool))
        :type output_file_definitions: dict of tuples
        """

        self._trim(output_file_definitions)

        file_entry_map = {}
        for manifest_file_entry in self._json_manifest['output_data']:
            entry_name = manifest_file_entry['name']
            if 'files' in manifest_file_entry and not manifest_file_entry['files']:
                # skip empty lists
                continue
            file_entry_map[entry_name] = manifest_file_entry

        for file_name, (is_multiple, is_required) in output_file_definitions.items():
            if file_name not in file_entry_map:
                if is_required:
                    raise MissingRequiredOutput(file_name)
                else:
                    continue

            manifest_file_entry = file_entry_map[file_name]
            if is_multiple and 'files' not in manifest_file_entry:
                msg = 'The output parameter %s must have a files object in the results manifest' % file_name
                raise InvalidResultsManifest(msg)
            if not is_multiple and 'file' not in manifest_file_entry:
                msg = 'The output parameter %s must have a file object in the results manifest' % file_name
                raise InvalidResultsManifest(msg)
Beispiel #2
0
    def validate(self, output_file_definitions):
        """Validates the results manifest against given output file definitions.  Throws a
        :class `job.configuration.results.exceptions.ResultsManifestAndInterfaceDontMatch`: if the
        manifest doesn't match the outputs.  This does not validate that the parse_data matches the job
        data inputs.
        :param output_file_definitions: A dictionary with each output param name mapped to a tuple with
        (is_multiple (bool), required(bool))
        :type output_file_definitions: dict of tuples
        """

        self._trim(output_file_definitions)

        file_entry_map = {}
        for manifest_file_entry in self._json_manifest['files']:
            entry_name = manifest_file_entry['name']
            file_entry_map[entry_name] = manifest_file_entry

        for file_name, (is_multiple,
                        is_required) in output_file_definitions.items():
            if file_name not in file_entry_map:
                if is_required:
                    raise MissingRequiredOutput(file_name)
                else:
                    continue

            manifest_file_entry = file_entry_map[file_name]
            if is_multiple and 'paths' not in manifest_file_entry:
                msg = 'The output parameter %s must have a paths object in the results manifest' % file_name
                raise InvalidResultsManifest(msg)
            if not is_multiple and 'path' not in manifest_file_entry:
                msg = 'The output parameter %s must have a path object in the results manifest' % file_name
                raise InvalidResultsManifest(msg)
    def _validate_manifest(self):
        '''validates portions of the manifest that cannot be validated with the json_schema'''
        file_entries = set()
        for manifest_file_entry in self._json_manifest[u'output_data']:
            entry_name = manifest_file_entry[u'name']
            if entry_name in file_entries:
                raise InvalidResultsManifest(u'output names cannot be repeated')
            file_entries.add(entry_name)

            if u'file' in manifest_file_entry and u'files' in manifest_file_entry:
                raise InvalidResultsManifest(u'an output_data entry can only have file or files, not both')
            if u'file' not in manifest_file_entry and u'files' not in manifest_file_entry:
                raise InvalidResultsManifest(u'an output_data entry must have either file or files')
    def __init__(self, json_manifest=None):
        '''Creates a result manifest from the json_manifest
        :param json_manifest: a dict in the format described by RESULTS_MANIFEST_SCHEMA
        :type json_manifest: dict
        '''

        if json_manifest is None:
            json_manifest = {}

        if u'version' in json_manifest:
            version = json_manifest[u'version']
        else:
            version = MANIFEST_VERSION

        if version != MANIFEST_VERSION:
            json_manifest = self._convert_schema(json_manifest)

        self._json_manifest = json_manifest

        try:
            validate(json_manifest, RESULTS_MANIFEST_SCHEMA)
        except ValidationError as validation_error:
            raise InvalidResultsManifest(validation_error)

        self._populate_defaults()
        self._validate_manifest()
Beispiel #5
0
def parse_geo_json(geo_json):
    """Parses GeoJSON and returns a geometry object and metadata.

    :param geo_json: The geo json to parse
    :type geo_json: dict
    :rtype: GEOSGeometry, dict
    :returns: the geometry and metadata
    """

    geom = None
    geom_json = None
    props = None
    if geo_json['type'] == 'Feature':
        geom_json = geo_json['geometry']
        if 'properties' in geo_json:
            props = geo_json['properties']
    elif geo_json['type'] == 'FeatureCollection':
        # Currently handles collections by just grabbing first entry
        geom_json = geo_json['features'][0]['geometry']
        if 'properties' in geo_json['features'][0]:
            props = geo_json['features'][0]['properties']
    else:
        # The GeoJSON is just a geometry
        geom_json = geo_json

    # Parse geometry
    if geom_json:
        try:
            geom = geos.GEOSGeometry(json.dumps(geom_json), srid=4326)
        except geos.GEOSException as geos_error:
            raise InvalidResultsManifest(str(geos_error))

    return geom, props
Beispiel #6
0
    def test_determine_error(self):
        """Tests that a post-task successfully determines the correct error"""

        scale_errors = [
            ScaleDatabaseError(),
            ScaleIOError(),
            ScaleOperationalError(),
            InvalidResultsManifest(''),
            MissingRequiredOutput('')
        ]

        for scale_error in scale_errors:
            config = ExecutionConfiguration()
            config.create_tasks(['pre'])
            config.set_task_ids(self.job_exe.get_cluster_id())
            task = PostTask('agent_1', self.job_exe, self.job_exe.job_type,
                            config)
            update = job_test_utils.create_task_status_update(
                task.id, task.agent_id, TaskStatusUpdate.RUNNING, now())
            task.update(update)
            update = job_test_utils.create_task_status_update(
                task.id,
                task.agent_id,
                TaskStatusUpdate.FAILED,
                now(),
                exit_code=scale_error.exit_code)
            error = task.determine_error(update)
            self.assertEqual(scale_error.error_name, error.name)
Beispiel #7
0
    def _validate_manifest(self):
        """validates portions of the manifest that cannot be validated with the json_schema"""
        file_entries = set()
        for manifest_file_entry in self._json_manifest['output_data']:
            entry_name = manifest_file_entry['name']
            if entry_name in file_entries:
                msg = 'The output parameter %s appears multiple times in the results manifest' % entry_name
                raise InvalidResultsManifest(msg)
            file_entries.add(entry_name)

            if 'file' in manifest_file_entry and 'files' in manifest_file_entry:
                msg = 'The output parameter %s cannot have both file and files objects in the results manifest'
                raise InvalidResultsManifest(msg % entry_name)
            if 'file' not in manifest_file_entry and 'files' not in manifest_file_entry:
                msg = 'The output parameter %s must have either a file or files object in the results manifest'
                raise InvalidResultsManifest(msg % entry_name)
Beispiel #8
0
    def ready(self):
        """Registers components related to jobs"""

        # Register job errors
        from error.exceptions import register_error
        from job.configuration.exceptions import MissingMount, MissingSetting
        from job.configuration.results.exceptions import InvalidResultsManifest, MissingRequiredOutput

        register_error(InvalidResultsManifest(''))
        register_error(MissingMount(''))
        register_error(MissingRequiredOutput(''))
        register_error(MissingSetting(''))

        # Register job message types
        from job.messages.blocked_jobs import BlockedJobs
        from job.messages.failed_jobs import FailedJobs
        from job.messages.job_exe_end import CreateJobExecutionEnd
        from job.messages.pending_jobs import PendingJobs
        from job.messages.running_jobs import RunningJobs
        from messaging.messages.factory import add_message_type

        add_message_type(BlockedJobs)
        add_message_type(FailedJobs)
        add_message_type(CreateJobExecutionEnd)
        add_message_type(PendingJobs)
        add_message_type(RunningJobs)
Beispiel #9
0
    def _convert_schema(self, json_manifest):
        '''Convert the previous manifest schema to the 1.0 manifest schema

        :param json_manifest: The old manifest
        :type json_manifest: dict
        :return: converted manifest
        :rtype: dict
        '''
        # There are no supported manifest versions before 1.0
        raise InvalidResultsManifest()
Beispiel #10
0
    def ready(self):
        """Registers job errors"""
        from error.exceptions import register_error
        from job.configuration.exceptions import MissingMount, MissingSetting
        from job.configuration.results.exceptions import InvalidResultsManifest, MissingRequiredOutput

        register_error(InvalidResultsManifest(''))
        register_error(MissingMount(''))
        register_error(MissingRequiredOutput(''))
        register_error(MissingSetting(''))
Beispiel #11
0
    def _convert_schema(self, json_manifest):
        """Convert the previous manifest schema to the 1.0 manifest schema

        :param json_manifest: The old manifest
        :type json_manifest: dict
        :return: converted manifest
        :rtype: dict
        """

        # There are no supported manifest versions before 1.0
        raise InvalidResultsManifest(
            '%s is an invalid results manifest version' %
            json_manifest['version'])
Beispiel #12
0
    def __init__(self, json_manifest):
        '''Creates a result manifest from the json_manifest
        :param json_manifest: a dict in the format described by RESULTS_MANIFEST_SCHEMA
        :type json_manifest: dict
        '''

        self._json_manifest = json_manifest

        try:
            validate(json_manifest, RESULTS_MANIFEST_SCHEMA)
        except ValidationError as validation_error:
            raise InvalidResultsManifest(validation_error)

        self._populate_defaults()
        self._validate_manifest()
Beispiel #13
0
    def ready(self):
        """Registers components related to jobs"""

        # Register job errors
        from error.exceptions import register_error
        from job.configuration.results.exceptions import InvalidResultsManifest, MissingRequiredOutput, UnexpectedMultipleOutputs
        from job.execution.configuration.exceptions import MissingMount, MissingSetting

        register_error(InvalidResultsManifest(''))
        register_error(MissingMount(''))
        register_error(MissingRequiredOutput(''))
        register_error(UnexpectedMultipleOutputs(''))
        register_error(MissingSetting(''))

        # Register job message types
        from job.messages.blocked_jobs import BlockedJobs
        from job.messages.cancel_jobs import CancelJobs
        from job.messages.cancel_jobs_bulk import CancelJobsBulk
        from job.messages.completed_jobs import CompletedJobs
        from job.messages.create_jobs import CreateJobs
        from job.messages.failed_jobs import FailedJobs
        from job.messages.job_exe_end import CreateJobExecutionEnd
        from job.messages.pending_jobs import PendingJobs
        from job.messages.process_job_input import ProcessJobInput
        from job.messages.publish_job import PublishJob
        from job.messages.purge_jobs import PurgeJobs
        from job.messages.running_jobs import RunningJobs
        from job.messages.spawn_delete_files_job import SpawnDeleteFilesJob
        from job.messages.uncancel_jobs import UncancelJobs
        from job.messages.unpublish_jobs import UnpublishJobs
        from messaging.messages.factory import add_message_type

        add_message_type(BlockedJobs)
        add_message_type(CancelJobs)
        add_message_type(CancelJobsBulk)
        add_message_type(CompletedJobs)
        add_message_type(CreateJobs)
        add_message_type(FailedJobs)
        add_message_type(CreateJobExecutionEnd)
        add_message_type(PendingJobs)
        add_message_type(ProcessJobInput)
        add_message_type(PublishJob)
        add_message_type(PurgeJobs)
        add_message_type(RunningJobs)
        add_message_type(SpawnDeleteFilesJob)
        add_message_type(UncancelJobs)
        add_message_type(UnpublishJobs)
    def test_scale_post_steps_invalid_manifest_error(self,
                                                     mock_job_exe_manager,
                                                     mock_sys_exit):
        """Tests executing scale_post_steps when an invalid manifest occurs."""

        # Set up mocks
        mock_job_exe_manager.get_job_exe_with_job_and_job_type.return_value.get_job_interface.return_value.perform_post_steps.side_effect = InvalidResultsManifest(
            '')

        # Call method to test
        cmd = PostCommand()
        cmd.run_from_argv(
            ['manage.py', 'scale_post_steps', '-i',
             str(self.job_exe.id)])

        # Check results
        mock_sys_exit.assert_called_with(InvalidResultsManifest('').exit_code)
Beispiel #15
0
    def test_scale_post_steps_invalid_manifest_error(self, mock_env_vars,
                                                     mock_job_exe_manager,
                                                     mock_sys_exit):
        """Tests executing scale_post_steps when an invalid manifest occurs."""

        # Set up mocks
        def get_env_vars(name, *args, **kwargs):
            return str(self.job.id) if name == 'SCALE_JOB_ID' else str(
                self.job_exe.exe_num)

        mock_env_vars.side_effect = get_env_vars
        mock_job_exe_manager.get_job_exe_with_job_and_job_type.return_value.job_type.get_job_interface.return_value.perform_post_steps.side_effect = InvalidResultsManifest(
            '')

        # Call method to test
        cmd = PostCommand()
        cmd.run_from_argv(['manage.py', 'scale_post_steps'])

        # Check results
        mock_sys_exit.assert_called_with(InvalidResultsManifest('').exit_code)
Beispiel #16
0
    def perform_post_steps(self, job_exe, job_data, stdoutAndStderr):
        """Stores the files and deletes any working directories

        :param job_exe: The job execution model with related job and job_type fields
        :type job_exe: :class:`job.models.JobExecution`
        :param job_data: The job data
        :type job_data: :class:`job.configuration.data.job_data.JobData`
        :param stdoutAndStderr: the standard out from the job execution
        :type stdoutAndStderr: str
        :return: A tuple of the job results and the results manifest generated by the job execution
        :rtype: (:class:`job.configuration.results.job_results.JobResults`,
            :class:`job.configuration.results.results_manifest.results_manifest.ResultsManifest`)
        """

        manifest_data = {}
        path_to_manifest_file = os.path.join(SCALE_JOB_EXE_OUTPUT_PATH,
                                             'results_manifest.json')
        if os.path.exists(path_to_manifest_file):
            logger.info('Opening results manifest...')
            with open(path_to_manifest_file, 'r') as manifest_file:
                manifest_data = json.loads(manifest_file.read())
                logger.info('Results manifest:')
                logger.info(manifest_data)
        else:
            logger.info('No results manifest found')

        results_manifest = ResultsManifest(manifest_data)
        stdout_files = self._get_artifacts_from_stdout(stdoutAndStderr)
        results_manifest.add_files(stdout_files)

        results_manifest.validate(self._output_file_manifest_dict)

        files_to_store = {}
        for manifest_file_entry in results_manifest.get_files():
            param_name = manifest_file_entry['name']

            media_type = None
            output_data_item = self._get_output_data_item_by_name(param_name)
            if output_data_item:
                media_type = output_data_item.get('media_type')

            msg = 'Output %s has invalid/missing file path "%s"'
            if 'file' in manifest_file_entry:
                file_entry = manifest_file_entry['file']
                if not os.path.isfile(file_entry['path']):
                    raise InvalidResultsManifest(
                        msg % (param_name, file_entry['path']))
                if 'geo_metadata' in file_entry:
                    files_to_store[param_name] = (file_entry['path'],
                                                  media_type,
                                                  file_entry['geo_metadata'])
                else:
                    files_to_store[param_name] = (file_entry['path'],
                                                  media_type)
            elif 'files' in manifest_file_entry:
                file_tuples = []
                for file_entry in manifest_file_entry['files']:
                    if not os.path.isfile(file_entry['path']):
                        raise InvalidResultsManifest(
                            msg % (param_name, file_entry['path']))
                    if 'geo_metadata' in file_entry:
                        file_tuples.append((file_entry['path'], media_type,
                                            file_entry['geo_metadata']))
                    else:
                        file_tuples.append((file_entry['path'], media_type))
                files_to_store[param_name] = file_tuples

        job_data_parse_results = {}  # parse results formatted for job_data
        for parse_result in results_manifest.get_parse_results():
            filename = parse_result['filename']
            assert filename not in job_data_parse_results
            geo_metadata = parse_result.get('geo_metadata', {})
            geo_json = geo_metadata.get('geo_json', None)
            data_started = geo_metadata.get('data_started', None)
            data_ended = geo_metadata.get('data_ended', None)
            data_types = parse_result.get('data_types', [])
            new_workspace_path = parse_result.get('new_workspace_path', None)
            if new_workspace_path:
                new_workspace_path = os.path.join(new_workspace_path, filename)
            job_data_parse_results[filename] = (geo_json, data_started,
                                                data_ended, data_types,
                                                new_workspace_path)

        job_data.save_parse_results(job_data_parse_results)
        return (job_data.store_output_data_files(files_to_store,
                                                 job_exe), results_manifest)