Ejemplo n.º 1
0
    def test_process_job_output(self):
        """Tests calling JobManager.process_job_output()"""

        output_1 = JobResults()
        output_1.add_file_parameter('foo', 1)
        output_2 = JobResults()
        output_2.add_file_parameter('foo', 2)

        # These jobs have completed and have their execution results
        job_exe_1 = job_test_utils.create_job_exe(status='COMPLETED',
                                                  output=output_1)
        job_exe_2 = job_test_utils.create_job_exe(status='COMPLETED',
                                                  output=output_2)

        # These jobs have their execution results, but have not completed
        job_exe_3 = job_test_utils.create_job_exe(status='RUNNING')
        job_exe_4 = job_test_utils.create_job_exe(status='RUNNING')
        for job_exe in [job_exe_3, job_exe_4]:
            job_exe_output = JobExecutionOutput()
            job_exe_output.job_exe_id = job_exe.id
            job_exe_output.job_id = job_exe.job_id
            job_exe_output.job_type_id = job_exe.job.job_type_id
            job_exe_output.exe_num = job_exe.exe_num
            job_exe_output.output = JobResults().get_dict()
            job_exe_output.save()

        # These jobs have completed, but do not have their execution results
        job_exe_5 = job_test_utils.create_job_exe(status='RUNNING')
        job_exe_6 = job_test_utils.create_job_exe(status='RUNNING')
        for job in [job_exe_5.job, job_exe_6.job]:
            job.status = 'COMPLETED'
            job.save()

        # Test method
        job_ids = [
            job_exe.job_id for job_exe in
            [job_exe_1, job_exe_2, job_exe_3, job_exe_4, job_exe_5, job_exe_6]
        ]
        result_ids = Job.objects.process_job_output(job_ids, timezone.now())

        self.assertEqual(set(result_ids), {job_exe_1.job_id, job_exe_2.job_id})
        # Jobs 1 and 2 should have output populated, jobs 3 through 6 should not
        jobs = list(Job.objects.filter(id__in=job_ids).order_by('id'))
        self.assertEqual(len(jobs), 6)
        self.assertTrue(jobs[0].has_output())
        self.assertDictEqual(jobs[0].output, output_1.get_dict())
        self.assertTrue(jobs[1].has_output())
        self.assertDictEqual(jobs[1].output, output_2.get_dict())
        self.assertFalse(jobs[2].has_output())
        self.assertFalse(jobs[3].has_output())
        self.assertFalse(jobs[4].has_output())
        self.assertFalse(jobs[5].has_output())
Ejemplo n.º 2
0
    def test_successful_file(self):
        """Tests calling JobResults.add_output_to_data() successfully with a file parameter"""

        output_name = 'foo'
        file_id = 1337
        input_name = 'bar'

        results = JobResults()
        results.add_file_parameter(output_name, file_id)

        job_data = MagicMock()
        results.add_output_to_data(output_name, job_data, input_name)
        job_data.add_file_input.assert_called_with(input_name, file_id)
Ejemplo n.º 3
0
    def test_successful_file(self):
        '''Tests calling JobResults.add_output_to_data() successfully with a file parameter'''

        output_name = u'foo'
        file_id = 1337
        input_name = u'bar'

        results = JobResults()
        results.add_file_parameter(output_name, file_id)

        job_data = MagicMock()
        results.add_output_to_data(output_name, job_data, input_name)
        job_data.add_file_input.assert_called_with(input_name, file_id)
Ejemplo n.º 4
0
    def store_output_data_files(self, data_files, job_exe):
        """Stores the given data output files

        :param data_files: Dict with each file parameter name mapping to a list of ProductFileMetadata classes
        :type data_files: {string: [`ProductFileMetadata`]}
        :param job_exe: The job execution model (with related job and job_type fields) that is storing the output data
            files
        :type job_exe: :class:`job.models.JobExecution`
        :returns: The job results
        :rtype: :class:`job.configuration.results.job_results.JobResults`
        """

        # Organize the data files
        workspace_files = {
        }  # Workspace ID -> [(absolute local file path, media type)]
        params_by_file_path = {
        }  # Absolute local file path -> output parameter name
        output_workspaces = JobData.create_output_workspace_dict(
            data_files.keys(), self, job_exe)
        for name in data_files:
            workspace_id = output_workspaces[name]
            if workspace_id in workspace_files:
                workspace_file_list = workspace_files[workspace_id]
            else:
                workspace_file_list = []
                workspace_files[workspace_id] = workspace_file_list
            data_file_entry = data_files[name]
            if isinstance(data_file_entry, list):
                for file_entry in data_file_entry:
                    file_path = os.path.normpath(file_entry.local_path)
                    if not os.path.isfile(file_path):
                        raise Exception('%s is not a valid file' % file_path)
                    params_by_file_path[file_path] = name
                    workspace_file_list.append(file_entry)
            else:
                file_path = os.path.normpath(data_file_entry.local_path)
                if not os.path.isfile(file_path):
                    raise Exception('%s is not a valid file' % file_path)
                params_by_file_path[file_path] = name
                data_file_entry.local_path = file_path
                workspace_file_list.append(data_file_entry)

        data_file_store = DATA_FILE_STORE['DATA_FILE_STORE']
        if not data_file_store:
            raise Exception('No data file store found')
        stored_files = data_file_store.store_files(workspace_files,
                                                   self.get_input_file_ids(),
                                                   job_exe)

        # Organize results
        param_file_ids = {}  # Output parameter name -> file ID or [file IDs]
        for file_path in stored_files:
            file_id = stored_files[file_path]
            name = params_by_file_path[file_path]
            if isinstance(data_files[name], list):
                if name in param_file_ids:
                    file_id_list = param_file_ids[name]
                else:
                    file_id_list = []
                    param_file_ids[name] = file_id_list
                file_id_list.append(file_id)
            else:
                param_file_ids[name] = file_id

        # Create job results
        results = JobResults()
        for name in param_file_ids:
            param_entry = param_file_ids[name]
            if isinstance(param_entry, list):
                results.add_file_list_parameter(name, param_entry)
            else:
                results.add_file_parameter(name, param_entry)
        return results
Ejemplo n.º 5
0
    def store_output_data_files(self, data_files, job_exe):
        """Stores the given data output files

        :param data_files: Dict with each file parameter name mapping to a tuple of absolute local file path and media
            type (media type is optionally None) for a single file parameter and a list of tuples for a multiple file
            parameter
        :type data_files: {string: tuple(string, string)} or [tuple(string, string)]
        :param job_exe: The job execution model (with related job and job_type fields) that is storing the output data
            files
        :type job_exe: :class:`job.models.JobExecution`
        :returns: The job results
        :rtype: :class:`job.configuration.results.job_results.JobResults`
        """

        # Organize the data files
        workspace_files = {
        }  # Workspace ID -> [(absolute local file path, media type)]
        params_by_file_path = {
        }  # Absolute local file path -> output parameter name
        for name in data_files:
            file_output = self.data_outputs_by_name[name]
            workspace_id = file_output['workspace_id']
            if workspace_id in workspace_files:
                workspace_file_list = workspace_files[workspace_id]
            else:
                workspace_file_list = []
                workspace_files[workspace_id] = workspace_file_list
            data_file_entry = data_files[name]
            if isinstance(data_file_entry, list):
                for file_tuple in data_file_entry:
                    file_path = os.path.normpath(file_tuple[0])
                    if not os.path.isfile(file_path):
                        raise Exception('%s is not a valid file' % file_path)
                    params_by_file_path[file_path] = name
                    # Adjust file path to be relative to upload_dir
                    if len(file_tuple) == 2:
                        new_tuple = (file_path, file_tuple[1], name)
                    else:
                        new_tuple = (file_path, file_tuple[1], name,
                                     file_tuple[2])
                    workspace_file_list.append(new_tuple)
            else:
                file_path = os.path.normpath(data_file_entry[0])
                if not os.path.isfile(file_path):
                    raise Exception('%s is not a valid file' % file_path)
                params_by_file_path[file_path] = name
                # Adjust file path to be relative to upload_dir
                if len(data_file_entry) == 2:
                    new_tuple = (file_path, data_file_entry[1], name)
                else:
                    new_tuple = (file_path, data_file_entry[1], name,
                                 data_file_entry[2])
                workspace_file_list.append(new_tuple)

        data_file_store = DATA_FILE_STORE['DATA_FILE_STORE']
        if not data_file_store:
            raise Exception('No data file store found')
        stored_files = data_file_store.store_files(workspace_files,
                                                   self.get_input_file_ids(),
                                                   job_exe)

        # Organize results
        param_file_ids = {}  # Output parameter name -> file ID or [file IDs]
        for file_path in stored_files:
            file_id = stored_files[file_path]
            name = params_by_file_path[file_path]
            if isinstance(data_files[name], list):
                if name in param_file_ids:
                    file_id_list = param_file_ids[name]
                else:
                    file_id_list = []
                    param_file_ids[name] = file_id_list
                file_id_list.append(file_id)
            else:
                param_file_ids[name] = file_id

        # Create job results
        results = JobResults()
        for name in param_file_ids:
            param_entry = param_file_ids[name]
            if isinstance(param_entry, list):
                results.add_file_list_parameter(name, param_entry)
            else:
                results.add_file_parameter(name, param_entry)
        return results
Ejemplo n.º 6
0
    def store_output_data_files(self, data_files, job_exe):
        """Stores the given data output files

        :param data_files: Dict with each file parameter name mapping to a tuple of absolute local file path and media
            type (media type is optionally None) for a single file parameter and a list of tuples for a multiple file
            parameter
        :type data_files: dict of str -> tuple(str, str) or list of tuple(str, str)
        :param job_exe: The job execution model (with related job and job_type fields) that is storing the output data
            files
        :type job_exe: :class:`job.models.JobExecution`
        :returns: The job results
        :rtype: :class:`job.configuration.results.job_results.JobResults`
        """

        # Organize the data files
        workspace_files = {}  # Workspace ID -> list of (absolute local file path, media type)
        params_by_file_path = {}  # Absolute local file path -> output parameter name
        for name in data_files:
            file_output = self.data_outputs_by_name[name]
            workspace_id = file_output['workspace_id']
            if workspace_id in workspace_files:
                workspace_file_list = workspace_files[workspace_id]
            else:
                workspace_file_list = []
                workspace_files[workspace_id] = workspace_file_list
            data_file_entry = data_files[name]
            if isinstance(data_file_entry, list):
                for file_tuple in data_file_entry:
                    file_path = os.path.normpath(file_tuple[0])
                    if not os.path.isfile(file_path):
                        raise Exception('%s is not a valid file' % file_path)
                    params_by_file_path[file_path] = name
                    # Adjust file path to be relative to upload_dir
                    if len(file_tuple) == 2:
                        new_tuple = (file_path, file_tuple[1])
                    else:
                        new_tuple = (file_path, file_tuple[1], file_tuple[2])
                    workspace_file_list.append(new_tuple)
            else:
                file_path = os.path.normpath(data_file_entry[0])
                if not os.path.isfile(file_path):
                    raise Exception('%s is not a valid file' % file_path)
                params_by_file_path[file_path] = name
                # Adjust file path to be relative to upload_dir
                if len(data_file_entry) == 2:
                    new_tuple = (file_path, data_file_entry[1])
                else:
                    new_tuple = (file_path, data_file_entry[1], data_file_entry[2])
                workspace_file_list.append(new_tuple)

        data_file_store = DATA_FILE_STORE['DATA_FILE_STORE']
        if not data_file_store:
            raise Exception('No data file store found')
        stored_files = data_file_store.store_files(workspace_files, self.get_input_file_ids(), job_exe)

        # Organize results
        param_file_ids = {}  # Output parameter name -> file ID or list of file IDs
        for file_path in stored_files:
            file_id = stored_files[file_path]
            name = params_by_file_path[file_path]
            if isinstance(data_files[name], list):
                if name in param_file_ids:
                    file_id_list = param_file_ids[name]
                else:
                    file_id_list = []
                    param_file_ids[name] = file_id_list
                file_id_list.append(file_id)
            else:
                param_file_ids[name] = file_id

        # Create job results
        results = JobResults()
        for name in param_file_ids:
            param_entry = param_file_ids[name]
            if isinstance(param_entry, list):
                results.add_file_list_parameter(name, param_entry)
            else:
                results.add_file_parameter(name, param_entry)
        return results
Ejemplo n.º 7
0
    def test_successful_supersede_mixed(self):
        """Tests calling QueueManager.queue_new_recipe() successfully when superseding a recipe where the results of a
        Seed job get passed to the input of a legacy job
        """

        workspace = storage_test_utils.create_workspace()
        source_file = source_test_utils.create_source(workspace=workspace)
        event = trigger_test_utils.create_trigger_event()

        interface_1 = {
            'seedVersion': '1.0.0',
            'job': {
                'name': 'job-type-a',
                'jobVersion': '1.0.0',
                'packageVersion': '1.0.0',
                'title': 'Job Type 1',
                'description': 'This is a description',
                'maintainer': {
                    'name': 'John Doe',
                    'email': '*****@*****.**'
                },
                'timeout': 10,
                'interface': {
                    'command': '',
                    'inputs': {
                        'files': [{
                            'name': 'test-input-a'
                        }]
                    },
                    'outputs': {
                        'files': [{
                            'name': 'test-output-a',
                            'pattern': '*.png'
                        }]
                    }
                }
            }
        }
        job_type_1 = job_test_utils.create_seed_job_type(manifest=interface_1)

        interface_2 = {
            'version':
            '1.0',
            'command':
            'test_command',
            'command_arguments':
            'test_arg',
            'input_data': [{
                'name': 'Test Input 2',
                'type': 'file',
                'media_types': ['image/png', 'image/tiff'],
            }],
            'output_data': [{
                'name': 'Test Output 2',
                'type': 'file',
            }]
        }
        job_type_2 = job_test_utils.create_job_type(interface=interface_2)

        definition = {
            'version':
            '1.0',
            'input_data': [{
                'name': 'Recipe Input',
                'type': 'file',
                'media_types': ['text/plain'],
            }],
            'jobs': [{
                'name':
                'Job 1',
                'job_type': {
                    'name': job_type_1.name,
                    'version': job_type_1.version,
                },
                'recipe_inputs': [{
                    'recipe_input': 'Recipe Input',
                    'job_input': 'test-input-a',
                }]
            }, {
                'name':
                'Job 2',
                'job_type': {
                    'name': job_type_2.name,
                    'version': job_type_2.version,
                },
                'dependencies': [{
                    'name':
                    'Job 1',
                    'connections': [{
                        'output': 'test-output-a',
                        'input': 'Test Input 2',
                    }]
                }]
            }]
        }

        recipe_definition = RecipeDefinition(definition)
        recipe_definition.validate_job_interfaces()

        recipe_type = recipe_test_utils.create_recipe_type(
            definition=definition)

        data = {
            'version': '1.0',
            'input_data': [{
                'name': 'Recipe Input',
                'file_id': source_file.id,
            }],
            'workspace_id': workspace.id,
        }
        data = LegacyRecipeData(data)

        # Queue initial recipe and complete its first job
        handler = Queue.objects.queue_new_recipe(recipe_type, data, event)
        recipe = Recipe.objects.get(id=handler.recipe.id)
        recipe_job_1 = RecipeNode.objects.select_related('job')
        recipe_job_1 = recipe_job_1.get(recipe_id=handler.recipe.id,
                                        node_name='Job 1')
        Job.objects.update_jobs_to_running([recipe_job_1.job], now())
        results = JobResults()
        results.add_file_parameter('test-output-a',
                                   product_test_utils.create_product().id)
        job_test_utils.create_job_exe(job=recipe_job_1.job,
                                      status='COMPLETED',
                                      output=results)
        Job.objects.update_jobs_to_completed([recipe_job_1.job], now())
        Job.objects.process_job_output([recipe_job_1.job_id], now())

        # Create a new recipe type that has a new version of job 2 (job 1 is identical)
        new_job_type_2 = job_test_utils.create_job_type(
            name=job_type_2.name,
            version='New Version',
            interface=job_type_2.manifest)
        new_definition = {
            'version':
            '1.0',
            'input_data': [{
                'name': 'Recipe Input',
                'type': 'file',
                'media_types': ['text/plain'],
            }],
            'jobs': [{
                'name':
                'New Job 1',
                'job_type': {
                    'name': job_type_1.name,
                    'version': job_type_1.version,
                },
                'recipe_inputs': [{
                    'recipe_input': 'Recipe Input',
                    'job_input': 'test-input-a',
                }]
            }, {
                'name':
                'New Job 2',
                'job_type': {
                    'name': new_job_type_2.name,
                    'version': new_job_type_2.version,
                },
                'dependencies': [{
                    'name':
                    'New Job 1',
                    'connections': [{
                        'output': 'test-output-a',
                        'input': 'Test Input 2',
                    }]
                }]
            }]
        }
        new_recipe_type = recipe_test_utils.create_recipe_type(
            name=recipe_type.name, definition=new_definition)
        event = trigger_test_utils.create_trigger_event()
        recipe_job_1 = RecipeNode.objects.select_related('job').get(
            recipe_id=handler.recipe.id, node_name='Job 1')
        recipe_job_2 = RecipeNode.objects.select_related('job').get(
            recipe_id=handler.recipe.id, node_name='Job 2')
        superseded_jobs = {
            'Job 1': recipe_job_1.job,
            'Job 2': recipe_job_2.job
        }
        graph_a = recipe_type.get_recipe_definition().get_graph()
        graph_b = new_recipe_type.get_recipe_definition().get_graph()
        delta = RecipeGraphDelta(graph_a, graph_b)

        # Queue new recipe that supersedes the old recipe
        new_handler = Queue.objects.queue_new_recipe(
            new_recipe_type,
            None,
            event,
            superseded_recipe=recipe,
            delta=delta,
            superseded_jobs=superseded_jobs)

        # Ensure old recipe is superseded
        recipe = Recipe.objects.get(id=handler.recipe.id)
        self.assertTrue(recipe.is_superseded)

        # Ensure new recipe supersedes old recipe
        new_recipe = Recipe.objects.get(id=new_handler.recipe.id)
        self.assertEqual(new_recipe.superseded_recipe_id, handler.recipe.id)

        # Ensure that job 1 is already completed (it was copied from original recipe) and that job 2 is queued
        new_recipe_job_1 = RecipeNode.objects.select_related('job').get(
            recipe_id=new_handler.recipe.id, node_name='New Job 1')
        new_recipe_job_2 = RecipeNode.objects.select_related('job').get(
            recipe_id=new_handler.recipe.id, node_name='New Job 2')
        self.assertEqual(new_recipe_job_1.job.status, 'COMPLETED')
        self.assertFalse(new_recipe_job_1.is_original)
        self.assertEqual(new_recipe_job_2.job.status, 'QUEUED')
        self.assertTrue(new_recipe_job_2.is_original)