def test_retrieve_input_data_files_success_multiple_input_file( self, retrieve_files, join): job_data = JobData({'files': {'TEST_FILE_INPUT': [1, 2]}}) retrieve_files.return_value = { 1: '/scale/input/TEST_FILE_INPUT1', 2: '/scale/input/TEST_FILE_INPUT2' } data_files = [ SeedInputFiles({ 'name': 'TEST_FILE_INPUT', 'multiple': True, 'required': True, 'mediaTypes': [], 'partial': False }) ] result = job_data.retrieve_input_data_files(data_files) self.assertEqual( result, { 'TEST_FILE_INPUT': [ '/scale/input/TEST_FILE_INPUT1', '/scale/input/TEST_FILE_INPUT2' ] })
def test_capture_source_metadata_files(self, join): input_files = [SeedInputFiles(x) for x in self.test_input_snippets] name_to_id = {"INPUT_FILE": [1]} metadata_name = 'INPUT_FILE.metadata.json' join.return_value = metadata_name metadata = { 'type': 'Feature', 'geometry': { 'type': 'Point', 'coordinates': [0, 1] }, 'properties': { 'dataStarted': '2018-06-01T00:00:00Z', 'dataEnded': '2018-06-01T01:00:00Z', 'dataTypes': ['one', 'two', 'three'] } } with open(metadata_name, 'w') as metadata_file: json.dump(metadata, metadata_file) outputs = JobResults()._capture_source_metadata_files( input_files, name_to_id) os.remove(metadata_name) self.assertEqual(len(outputs), 1) self.assertDictEqual(outputs[1].data, metadata)
def get_seed_input_files(self): """Get the list of SeedInputFiles typed results :return: list of output files elements :rtype: [`job.seed.types.SeedInputFiles`] """ return [SeedInputFiles(x) for x in self.get_input_files()]
def _get_seed_input_files(self): """ :return: typed instance of Input Files :rtype: [:class:`job.seed.types.SeedInputFiles`] """ return [SeedInputFiles(x) for x in self._get_input_files()]
def __init__(self, definition): """Creates a recipe definition object from the given dictionary. The general format is checked for correctness, but the actual job details are not checked for correctness. :param definition: The recipe definition :type definition: dict :raises InvalidDefinition: If the given definition is invalid """ self._definition = definition self._input_files_by_name = { } # Name -> `job.seed.types.SeedInputFiles` self._input_json_by_name = {} # Name -> `job.seed.types.SeedInputJson` self._jobs_by_name = {} # Name -> job dict self._property_validation_dict = {} # Property Input name -> required self._input_file_validation_dict = { } # File Input name -> (required, multiple, file description) try: validate(definition, RECIPE_DEFINITION_SCHEMA) except ValidationError as ex: raise InvalidDefinition('Invalid recipe definition: %s' % unicode(ex)) self._populate_default_values() if not self._definition['version'] == DEFAULT_VERSION: raise InvalidDefinition('%s is an unsupported version number' % self._definition['version']) for input_file in self._get_input_files(): name = input_file['name'] if name in self._input_files_by_name: raise InvalidDefinition( 'Invalid recipe definition: %s is a duplicate input data name' % name) self._input_files_by_name[name] = SeedInputFiles(input_file) for input_json in self._get_input_json(): name = input_json['name'] if name in self._input_json_by_name or name in self._input_files_by_name: raise InvalidDefinition( 'Invalid recipe definition: %s is a duplicate input data name' % name) self._input_json_by_name[name] = SeedInputJson(input_json) for job_dict in self._definition['jobs']: name = job_dict['name'] if name in self._jobs_by_name: raise InvalidDefinition( 'Invalid recipe definition: %s is a duplicate job name' % name) self._jobs_by_name[name] = job_dict self._create_validation_dicts() self._validate_job_dependencies() self._validate_no_dup_job_inputs() self._validate_recipe_inputs()
def test_retrieve_input_data_files_missing_plurality_mismatch(self, retrieve_files, join): job_data = JobData({'files': {'TEST_FILE_INPUT': [1]}}) retrieve_files.return_value = {} data_files = [SeedInputFiles( {'name': 'TEST_FILE_INPUT', 'multiple': True, 'required': True, 'mediaTypes': [], 'partial': False})] with self.assertRaises(Exception): job_data.retrieve_input_data_files(data_files)
def test_retrieve_input_data_files_failure_multiple_for_single_input_file(self, retrieve_files, join): job_data = JobData({'files': {'TEST_FILE_INPUT': [1, 2]}}) retrieve_files.return_value = {1: '/scale/input/TEST_FILE_INPUT1', 2: '/scale/input/TEST_FILE_INPUT2'} data_files = [SeedInputFiles( {'name': 'TEST_FILE_INPUT', 'multiple': False, 'required': True, 'mediaTypes': [], 'partial': False})] with self.assertRaises(Exception): job_data.retrieve_input_data_files(data_files)
def test_retrieve_input_data_files_missing_file_not_required(self, retrieve_files, join): job_data = JobData({'files': {}}) retrieve_files.return_value = {} data_files = [SeedInputFiles( {'name': 'TEST_FILE_INPUT', 'multiple': False, 'required': False, 'mediaTypes': [], 'partial': False})] result = job_data.retrieve_input_data_files(data_files) self.assertEqual(result, {})
def setup_job_dir(self, data_files): """Sets up the directory structure for a job execution and downloads the given files :param data_files: Dict with each file parameter name mapping to a bool indicating if the parameter accepts multiple files (True) and an absolute directory path :type data_files: {string: tuple(bool, string)} :returns: Dict with each file parameter name mapping to a list of absolute file paths of the written files :rtype: {string: [string]} """ data_files = [SeedInputFiles(x) for x in data_files] # Download the job execution input files self.retrieve_input_data_files(data_files)
def validate_data(self, job_data): """Ensures that the job_data matches the job_interface description :param job_data: The job data :type job_data: :class:`job.data.job_data.JobData` :returns: A list of warnings discovered during validation. :rtype: list[:class:`job.data.job_data.ValidationWarning`] :raises :class:`job.data.exceptions.InvalidData`: If there is a configuration problem. """ warnings = [] warnings.extend(job_data.validate_input_files([SeedInputFiles(x) for x in self.get_input_files()])) warnings.extend(job_data.validate_input_json([SeedInputJson(x) for x in self.get_input_json()])) warnings.extend(job_data.validate_output_files([x['name'] for x in self.get_output_files()])) return warnings
def validate_connection(self, job_conn): """Validates the given job connection to ensure that the connection will provide sufficient data to run a job with this interface :param job_conn: The job data :type job_conn: :class:`job.seed.data.job_connection.JobConnection` :returns: A list of warnings discovered during validation. :rtype: list[:class:`job.data.job_data.ValidationWarning`] :raises :class:`job.data.exceptions.InvalidConnection`: If there is a configuration problem. """ warnings = [] warnings.extend(job_conn.validate_input_files([SeedInputFiles(x) for x in self.get_input_files()])) warnings.extend(job_conn.validate_properties([SeedInputJson(x) for x in self.get_input_json()])) # Make sure connection has a workspace if the interface has any output files if len(self.get_output_files()) and not job_conn.has_workspace(): raise InvalidConnection('No workspace provided for output files') return warnings