def test_successful_supersede_same_recipe_type(self): """Tests calling RecipeManager.create_recipe() to supersede a recipe with the same recipe type.""" event = trigger_test_utils.create_trigger_event() handler = Recipe.objects.create_recipe(recipe_type=self.recipe_type, event=event, data=RecipeData(self.data)) recipe = Recipe.objects.get(id=handler.recipe.id) recipe_job_1 = RecipeJob.objects.select_related('job').get( recipe_id=handler.recipe.id, job_name='Job 1') recipe_job_2 = RecipeJob.objects.select_related('job').get( recipe_id=handler.recipe.id, job_name='Job 2') superseded_jobs = { 'Job 1': recipe_job_1.job, 'Job 2': recipe_job_2.job } # Create a new recipe of the same type where we want to reprocess Job 2 graph = self.recipe_type.get_recipe_definition().get_graph() delta = RecipeGraphDelta(graph, graph) delta.reprocess_identical_node('Job 2') # We want to reprocess Job 2 new_handler = Recipe.objects.create_recipe( recipe_type=self.recipe_type, event=event, data=None, superseded_recipe=recipe, delta=delta, superseded_jobs=superseded_jobs) # Check that old recipe and job 2 are superseded, job 1 should be copied (not superseded) recipe = Recipe.objects.get(id=recipe.id) job_1 = Job.objects.get(id=recipe_job_1.job_id) job_2 = Job.objects.get(id=recipe_job_2.job_id) self.assertTrue(recipe.is_superseded) self.assertFalse(job_1.is_superseded) self.assertTrue(job_2.is_superseded) # Check that new recipe supersedes the old one, job 1 is copied from old recipe, and job 2 supersedes old job 2 new_recipe = Recipe.objects.get(id=new_handler.recipe.id) new_recipe_job_1 = RecipeJob.objects.select_related('job').get( recipe_id=new_handler.recipe.id, job_name='Job 1') new_recipe_job_2 = RecipeJob.objects.select_related('job').get( recipe_id=new_handler.recipe.id, job_name='Job 2') self.assertEqual(new_recipe.superseded_recipe_id, recipe.id) self.assertEqual(new_recipe.root_superseded_recipe_id, recipe.id) self.assertDictEqual(new_recipe.data, recipe.data) self.assertEqual(new_recipe_job_1.job.id, job_1.id) self.assertFalse(new_recipe_job_1.is_original) self.assertIsNone(new_recipe_job_1.job.superseded_job) self.assertIsNone(new_recipe_job_1.job.root_superseded_job) self.assertNotEqual(new_recipe_job_2.job.id, job_2.id) self.assertTrue(new_recipe_job_2.is_original) self.assertEqual(new_recipe_job_2.job.superseded_job_id, job_2.id) self.assertEqual(new_recipe_job_2.job.root_superseded_job_id, job_2.id)
def test_successful_supersede_same_recipe_type(self): """Tests calling RecipeManager.create_recipe() to supersede a recipe with the same recipe type.""" event = trigger_test_utils.create_trigger_event() handler = Recipe.objects.create_recipe(recipe_type=self.recipe_type, event=event, data=RecipeData(self.data)) recipe = Recipe.objects.get(id=handler.recipe.id) recipe_job_1 = RecipeJob.objects.select_related('job').get(recipe_id=handler.recipe.id, job_name='Job 1') recipe_job_2 = RecipeJob.objects.select_related('job').get(recipe_id=handler.recipe.id, job_name='Job 2') superseded_jobs = {'Job 1': recipe_job_1.job, 'Job 2': recipe_job_2.job} # Create a new recipe of the same type where we want to reprocess Job 2 graph = self.recipe_type.get_recipe_definition().get_graph() delta = RecipeGraphDelta(graph, graph) delta.reprocess_identical_node('Job 2') # We want to reprocess Job 2 new_handler = Recipe.objects.create_recipe(recipe_type=self.recipe_type, event=event, data=None, superseded_recipe=recipe, delta=delta, superseded_jobs=superseded_jobs) # Check that old recipe and job 2 are superseded, job 1 should be copied (not superseded) recipe = Recipe.objects.get(id=recipe.id) job_1 = Job.objects.get(id=recipe_job_1.job_id) job_2 = Job.objects.get(id=recipe_job_2.job_id) self.assertTrue(recipe.is_superseded) self.assertFalse(job_1.is_superseded) self.assertTrue(job_2.is_superseded) # Check that new recipe supersedes the old one, job 1 is copied from old recipe, and job 2 supersedes old job 2 new_recipe = Recipe.objects.get(id=new_handler.recipe.id) new_recipe_job_1 = RecipeJob.objects.select_related('job').get(recipe_id=new_handler.recipe.id, job_name='Job 1') new_recipe_job_2 = RecipeJob.objects.select_related('job').get(recipe_id=new_handler.recipe.id, job_name='Job 2') self.assertEqual(new_recipe.superseded_recipe_id, recipe.id) self.assertEqual(new_recipe.root_superseded_recipe_id, recipe.id) self.assertDictEqual(new_recipe.data, recipe.data) self.assertEqual(new_recipe_job_1.job.id, job_1.id) self.assertFalse(new_recipe_job_1.is_original) self.assertIsNone(new_recipe_job_1.job.superseded_job) self.assertIsNone(new_recipe_job_1.job.root_superseded_job) self.assertNotEqual(new_recipe_job_2.job.id, job_2.id) self.assertTrue(new_recipe_job_2.is_original) self.assertEqual(new_recipe_job_2.job.superseded_job_id, job_2.id) self.assertEqual(new_recipe_job_2.job.root_superseded_job_id, job_2.id)
def test_reprocess_identical_node(self): """Tests calling RecipeGraphDelta.reprocess_identical_node() to indicate identical nodes that should be marked as changed""" definition_a = { 'version': '1.0', 'input_data': [{ 'name': 'Recipe Input 1', 'type': 'file', 'media_types': ['text/plain'], }, { 'name': 'Recipe Input 2', 'type': 'property' }], 'jobs': [{ 'name': 'Job A', 'job_type': { 'name': self.job_a.job_type.name, 'version': self.job_a.job_type.version, }, 'recipe_inputs': [{ 'recipe_input': 'Recipe Input 1', 'job_input': 'Job Input 1', }] }, { 'name': 'Job B', 'job_type': { 'name': self.job_b.job_type.name, 'version': self.job_b.job_type.version, }, 'recipe_inputs': [{ 'recipe_input': 'Recipe Input 2', 'job_input': 'Job Input 1', }] }, { 'name': 'Job C', 'job_type': { 'name': self.job_c.job_type.name, 'version': self.job_c.job_type.version, }, 'dependencies': [{ 'name': 'Job B', 'connections': [{ 'output': 'Job Output 1', 'input': 'Job Input 1', }], }] }, { 'name': 'Job D', 'job_type': { 'name': self.job_d.job_type.name, 'version': self.job_d.job_type.version, }, 'dependencies': [{ 'name': 'Job B', 'connections': [{ 'output': 'Job Output 1', 'input': 'Job Input 1', }], }] }] } graph_a = RecipeDefinition(definition_a).get_graph() definition_b = { 'version': '1.0', 'input_data': [{ 'name': 'Recipe Input 1', 'type': 'file', 'media_types': ['text/plain'], }, { 'name': 'Recipe Input 2', 'type': 'property' }], 'jobs': [{ 'name': 'Job 1', 'job_type': { 'name': self.job_a.job_type.name, 'version': self.job_a.job_type.version, }, 'recipe_inputs': [{ 'recipe_input': 'Recipe Input 1', 'job_input': 'Job Input 1', }] }, { 'name': 'Job 2', 'job_type': { 'name': self.job_b.job_type.name, 'version': self.job_b.job_type.version, }, 'recipe_inputs': [{ 'recipe_input': 'Recipe Input 2', 'job_input': 'Job Input 1', }] }, { 'name': 'Job 4', 'job_type': { 'name': self.job_d.job_type.name, 'version': self.job_d.job_type.version, }, 'dependencies': [{ 'name': 'Job 2', 'connections': [{ 'output': 'Job Output 1', 'input': 'Job Input 1', }], }] }, { 'name': 'Job 5', 'job_type': { 'name': self.job_a.job_type.name, 'version': self.job_a.job_type.version, }, 'dependencies': [{ 'name': 'Job 4', 'connections': [{ 'output': 'Job Output 1', 'input': 'Job Input 1', }], }] }] } graph_b = RecipeDefinition(definition_b).get_graph() # Initial delta delta = RecipeGraphDelta(graph_a, graph_b) expected_identical = {'Job 1': 'Job A', 'Job 2': 'Job B', 'Job 4': 'Job D'} expected_deleted = {'Job C'} expected_new = {'Job 5'} self.assertTrue(delta.can_be_reprocessed) self.assertDictEqual(delta.get_changed_nodes(), {}) self.assertSetEqual(delta.get_deleted_nodes(), expected_deleted) self.assertDictEqual(delta.get_identical_nodes(), expected_identical) self.assertSetEqual(delta.get_new_nodes(), expected_new) # Mark Job 2 (and its child Job 4) as changed so it will be reprocessed delta.reprocess_identical_node('Job 2') expected_changed = {'Job 2': 'Job B', 'Job 4': 'Job D'} expected_identical = {'Job 1': 'Job A'} expected_deleted = {'Job C'} expected_new = {'Job 5'} self.assertTrue(delta.can_be_reprocessed) self.assertDictEqual(delta.get_changed_nodes(), expected_changed) self.assertSetEqual(delta.get_deleted_nodes(), expected_deleted) self.assertDictEqual(delta.get_identical_nodes(), expected_identical) self.assertSetEqual(delta.get_new_nodes(), expected_new)
def reprocess_recipe(self, recipe_id, job_names=None, all_jobs=False): """Schedules an existing recipe for re-processing. All requested jobs, jobs that have changed in the latest revision, and any of their dependent jobs will be re-processed. All database changes occur in an atomic transaction. A recipe instance that is already superseded cannot be re-processed again. :param recipe_id: The identifier of the recipe to re-process. :type recipe_id: int :param job_names: A list of job names from the recipe that should be forced to re-process even if the latest recipe revision left them unchanged. If none are passed, then only jobs that changed are scheduled. :type job_names: [string] :param all_jobs: Indicates all jobs should be forced to re-process even if the latest recipe revision left them unchanged. This is a convenience for passing all the individual names in the job_names parameter and this parameter will override any values passed there. :type all_jobs: bool :returns: A handler for the new recipe :rtype: :class:`recipe.handlers.handler.RecipeHandler` """ # Determine the old recipe graph prev_recipe = Recipe.objects.select_related('recipe_type', 'recipe_type_rev').get(pk=recipe_id) prev_graph = prev_recipe.get_recipe_definition().get_graph() # Superseded recipes cannot be reprocessed if prev_recipe.is_superseded: raise ReprocessError('Unable to re-process a recipe that is already superseded') # Populate the list of all job names in the recipe as a shortcut if all_jobs: job_names = prev_graph.get_topological_order() # Determine the current recipe graph current_type = prev_recipe.recipe_type current_graph = current_type.get_recipe_definition().get_graph() # Make sure that something is different to reprocess if current_type.revision_num == prev_recipe.recipe_type_rev.revision_num and not job_names: raise ReprocessError('Job names must be provided when the recipe type has not changed') # Compute the job differences between recipe revisions including forced ones graph_delta = RecipeGraphDelta(prev_graph, current_graph) if job_names: for job_name in job_names: graph_delta.reprocess_identical_node(job_name) # Get the old recipe jobs that will be superseded prev_recipe_jobs = RecipeJob.objects.filter(recipe=prev_recipe) # Acquire model locks superseded_recipe = Recipe.objects.select_for_update().get(pk=recipe_id) prev_jobs = Job.objects.select_for_update().filter(pk__in=[rj.job_id for rj in prev_recipe_jobs]) prev_jobs_dict = {j.id: j for j in prev_jobs} superseded_jobs = {rj.job_name: prev_jobs_dict[rj.job_id] for rj in prev_recipe_jobs} # Create an event to represent this request description = {'user': '******'} event = TriggerEvent.objects.create_trigger_event('USER', None, description, timezone.now()) # Create the new recipe while superseding the old one and queuing the associated jobs try: from queue.models import Queue return Queue.objects.queue_new_recipe(current_type, None, event, superseded_recipe, graph_delta, superseded_jobs) except ImportError: raise ReprocessError('Unable to import from queue application')
def _handle_recipe_jobs(self, msg_already_run, recipes, new_revision_id, revisions, recipe_job_ids, job_names, all_jobs, when): """Handles the reprocessing of the recipe jobs :param msg_already_run: Whether the database transaction has already occurred :type msg_already_run: bool :param recipes: The new recipe models :type recipes: list :param new_revision_id: The ID of the new recipe type revision to use for reprocessing :type new_revision_id: int :param revisions: Recipe type revisions stored by revision ID :type revisions: dict :param recipe_job_ids: Dict where recipe ID maps to a dict where job_name maps to a list of job IDs :type recipe_job_ids: dict :param job_names: The job names within the recipes to force reprocess :type job_names: list :param all_jobs: If True then all jobs within the recipe should be reprocessed, False otherwise :type all_jobs: bool :param when: The time that the jobs were superseded :type when: :class:`datetime.datetime` :return: A list of messages that should be sent regarding the superseded jobs :rtype: list """ superseded_job_ids = [] unpublish_job_ids = [] recipe_job_models = [] recipe_job_count = 0 new_graph = revisions[new_revision_id].get_recipe_definition( ).get_graph() for recipe in recipes: job_ids = recipe_job_ids[ recipe. superseded_recipe_id] # Get job IDs for superseded recipe old_graph = revisions[ recipe.recipe_type_rev_id].get_recipe_definition().get_graph() names = old_graph.get_topological_order( ) if all_jobs else job_names # Compute the job differences between recipe revisions (force reprocess for jobs in job_names) graph_delta = RecipeGraphDelta(old_graph, new_graph) for job_name in names: graph_delta.reprocess_identical_node(job_name) # Jobs that are identical from old recipe to new recipe are just copied to new recipe if not msg_already_run: for identical_job_name in graph_delta.get_identical_nodes(): if identical_job_name in job_ids: for job_id in job_ids[identical_job_name]: recipe_job = RecipeNode() recipe_job.job_id = job_id recipe_job.node_name = identical_job_name recipe_job.recipe_id = recipe.id recipe_job.is_original = False recipe_job_count += 1 recipe_job_models.append(recipe_job) if len(recipe_job_models) >= MODEL_BATCH_SIZE: RecipeNode.objects.bulk_create( recipe_job_models) recipe_job_models = [] # Jobs that changed from old recipe to new recipe should be superseded for changed_job_name in graph_delta.get_changed_nodes(): if changed_job_name in job_ids: superseded_job_ids.extend(job_ids[changed_job_name]) # Jobs that were deleted from old recipe to new recipe should be superseded and unpublished for deleted_job_name in graph_delta.get_deleted_nodes(): if deleted_job_name in job_ids: superseded_job_ids.extend(job_ids[deleted_job_name]) unpublish_job_ids.extend(job_ids[deleted_job_name]) # Finish creating any remaining RecipeNode models if recipe_job_models and not msg_already_run: RecipeNode.objects.bulk_create(recipe_job_models) logger.info('Copied %d job(s) to the new recipe(s)', recipe_job_count) # Supersede recipe jobs that were not copied over to a new recipe if not msg_already_run: Job.objects.supersede_jobs(superseded_job_ids, when) logger.info('Superseded %d job(s)', len(superseded_job_ids)) logger.info('Found %d job(s) that should be unpublished', len(unpublish_job_ids)) # Create messages to unpublish and cancel jobs messages = create_cancel_jobs_messages(superseded_job_ids, when) messages.extend(create_unpublish_jobs_messages(unpublish_job_ids, when)) return messages
def test_reprocess_identical_node(self): """Tests calling RecipeGraphDelta.reprocess_identical_node() to indicate identical nodes that should be marked as changed""" definition_a = { 'version': '1.0', 'input_data': [{ 'name': 'Recipe Input 1', 'type': 'file', 'media_types': ['text/plain'], }, { 'name': 'Recipe Input 2', 'type': 'property' }], 'jobs': [{ 'name': 'Job A', 'job_type': { 'name': self.job_a.job_type.name, 'version': self.job_a.job_type.version, }, 'recipe_inputs': [{ 'recipe_input': 'Recipe Input 1', 'job_input': 'Job Input 1', }] }, { 'name': 'Job B', 'job_type': { 'name': self.job_b.job_type.name, 'version': self.job_b.job_type.version, }, 'recipe_inputs': [{ 'recipe_input': 'Recipe Input 2', 'job_input': 'Job Input 1', }] }, { 'name': 'Job C', 'job_type': { 'name': self.job_c.job_type.name, 'version': self.job_c.job_type.version, }, 'dependencies': [{ 'name': 'Job B', 'connections': [{ 'output': 'Job Output 1', 'input': 'Job Input 1', }], }] }, { 'name': 'Job D', 'job_type': { 'name': self.job_d.job_type.name, 'version': self.job_d.job_type.version, }, 'dependencies': [{ 'name': 'Job B', 'connections': [{ 'output': 'Job Output 1', 'input': 'Job Input 1', }], }] }] } graph_a = RecipeDefinition(definition_a).get_graph() definition_b = { 'version': '1.0', 'input_data': [{ 'name': 'Recipe Input 1', 'type': 'file', 'media_types': ['text/plain'], }, { 'name': 'Recipe Input 2', 'type': 'property' }], 'jobs': [{ 'name': 'Job 1', 'job_type': { 'name': self.job_a.job_type.name, 'version': self.job_a.job_type.version, }, 'recipe_inputs': [{ 'recipe_input': 'Recipe Input 1', 'job_input': 'Job Input 1', }] }, { 'name': 'Job 2', 'job_type': { 'name': self.job_b.job_type.name, 'version': self.job_b.job_type.version, }, 'recipe_inputs': [{ 'recipe_input': 'Recipe Input 2', 'job_input': 'Job Input 1', }] }, { 'name': 'Job 4', 'job_type': { 'name': self.job_d.job_type.name, 'version': self.job_d.job_type.version, }, 'dependencies': [{ 'name': 'Job 2', 'connections': [{ 'output': 'Job Output 1', 'input': 'Job Input 1', }], }] }, { 'name': 'Job 5', 'job_type': { 'name': self.job_a.job_type.name, 'version': self.job_a.job_type.version, }, 'dependencies': [{ 'name': 'Job 4', 'connections': [{ 'output': 'Job Output 1', 'input': 'Job Input 1', }], }] }] } graph_b = RecipeDefinition(definition_b).get_graph() # Initial delta delta = RecipeGraphDelta(graph_a, graph_b) expected_identical = {'Job 1': 'Job A', 'Job 2': 'Job B', 'Job 4': 'Job D'} expected_deleted = {'Job C'} expected_new = {'Job 5'} self.assertTrue(delta.can_be_reprocessed) self.assertDictEqual(delta.get_changed_nodes(), {}) self.assertSetEqual(delta.get_deleted_nodes(), expected_deleted) self.assertDictEqual(delta.get_identical_nodes(), expected_identical) self.assertSetEqual(delta.get_new_nodes(), expected_new) # Mark Job 2 (and its child Job 4) as changed so it will be reprocessed delta.reprocess_identical_node('Job 2') expected_changed = {'Job 2': 'Job B', 'Job 4': 'Job D'} expected_identical = {'Job 1': 'Job A'} expected_deleted = {'Job C'} expected_new = {'Job 5'} self.assertTrue(delta.can_be_reprocessed) self.assertDictEqual(delta.get_changed_nodes(), expected_changed) self.assertSetEqual(delta.get_deleted_nodes(), expected_deleted) self.assertDictEqual(delta.get_identical_nodes(), expected_identical) self.assertSetEqual(delta.get_new_nodes(), expected_new)
class BatchDefinition(object): """Represents a batch definition""" def __init__(self): """Constructor """ self.root_batch_id = None self.job_names = [] self.all_jobs = False # Derived fields self.estimated_recipes = 0 self.prev_batch_diff = None def validate(self, batch): """Validates the given batch to make sure it is valid with respect to this batch definition. The given batch must have all of its related fields populated, though id and root_batch_id may be None. The derived definition attributes, such as estimated recipe total and previous batch diff, will be populated by this method. :param batch: The batch model :type batch: :class:`batch.models.Batch` :returns: A list of warnings discovered during validation :rtype: list :raises :class:`batch.definition.exceptions.InvalidDefinition`: If the definition is invalid """ if self.root_batch_id: if batch.recipe_type_id != batch.superseded_batch.recipe_type_id: raise InvalidDefinition( 'MISMATCHED_RECIPE_TYPE', 'New batch and previous batch must have the same recipe type' ) if not batch.superseded_batch.is_creation_done: raise InvalidDefinition( 'PREV_BATCH_STILL_CREATING', 'Previous batch must have completed creating all of its recipes' ) # Generate recipe diff against the previous batch recipe_graph = batch.recipe_type_rev.get_recipe_definition( ).get_graph() prev_recipe_graph = batch.superseded_batch.recipe_type_rev.get_recipe_definition( ).get_graph() self.prev_batch_diff = RecipeGraphDelta(prev_recipe_graph, recipe_graph) if self.all_jobs: self.job_names = recipe_graph.get_topological_order() if self.job_names: for job_name in self.job_names: self.prev_batch_diff.reprocess_identical_node(job_name) if not self.prev_batch_diff.can_be_reprocessed: raise InvalidDefinition( 'PREV_BATCH_NO_REPROCESS', 'Previous batch cannot be reprocessed') self._estimate_recipe_total(batch) if not self.estimated_recipes: raise InvalidDefinition( 'NO_RECIPES', 'Batch definition must result in creating at least one recipe') return [] def _estimate_recipe_total(self, batch): """Estimates the number of recipes that will be created for the given batch. The given batch must have all of its related fields populated, though id and root_batch_id may be None. :param batch: The batch model :type batch: :class:`batch.models.Batch` """ self.estimated_recipes = 0 if batch.superseded_batch: self.estimated_recipes += batch.superseded_batch.recipes_total