def _create_subrecipes(self): """Creates the recipe models for a sub-recipe message :returns: The list of recipe models created :rtype: :func:`list` """ sub_recipes = {} # {Node name: recipe model} superseded_sub_recipes = {} revision_ids = [] # Get superseded sub-recipes from superseded recipe if self.superseded_recipe_id: superseded_sub_recipes = RecipeNode.objects.get_subrecipes(self.superseded_recipe_id) revision_ids = [r.recipe_type_rev_id for r in superseded_sub_recipes.values()] # Get recipe type revisions revision_tuples = [(sub.recipe_type_name, sub.recipe_type_rev_num) for sub in self.sub_recipes] revs_by_id = RecipeTypeRevision.objects.get_revision_map(revision_ids, revision_tuples) revs_by_tuple = {(rev.recipe_type.name, rev.revision_num): rev for rev in revs_by_id.values()} # Create new recipe models process_input_by_node = {} for sub_recipe in self.sub_recipes: node_name = sub_recipe.node_name process_input_by_node[node_name] = sub_recipe.process_input revision = revs_by_tuple[(sub_recipe.recipe_type_name, sub_recipe.recipe_type_rev_num)] superseded_recipe = superseded_sub_recipes[node_name] if node_name in superseded_sub_recipes else None recipe = Recipe.objects.create_recipe_v6(revision, self.event_id, root_recipe_id=self.root_recipe_id, recipe_id=self.recipe_id, batch_id=self.batch_id, superseded_recipe=superseded_recipe) sub_recipes[node_name] = recipe Recipe.objects.bulk_create(sub_recipes.values()) logger.info('Created %d recipe(s)', len(sub_recipes)) # Create recipe nodes recipe_nodes = RecipeNode.objects.create_subrecipe_nodes(self.recipe_id, sub_recipes) RecipeNode.objects.bulk_create(recipe_nodes) # Set up process input dict for sub_recipe in self.sub_recipes: recipe = sub_recipes[sub_recipe.node_name] self._process_input[recipe.id] = sub_recipe.process_input # Set up recipe diffs if self.superseded_recipe_id: for node_name, recipe in sub_recipes.items(): pair = _RecipePair(recipe.superseded_recipe, recipe) rev_id = recipe.superseded_recipe.recipe_type_rev_id old_revision = revs_by_id[rev_id] new_revision = revs_by_tuple[(recipe.recipe_type.name, recipe.recipe_type_rev.revision_num)] diff = RecipeDiff(old_revision.get_definition(), new_revision.get_definition()) if self.forced_nodes: sub_forced_nodes = self.forced_nodes.get_forced_nodes_for_subrecipe(node_name) if sub_forced_nodes: diff.set_force_reprocess(sub_forced_nodes) self._recipe_diffs.append(_RecipeDiff(diff, [pair])) return sub_recipes.values()
def _find_existing_recipes(self): """Searches to determine if this message already ran and the new recipes already exist :returns: The list of recipe models found :rtype: :func:`list` """ recipes = [] if self.create_recipes_type == REPROCESS_TYPE: qry = Recipe.objects.select_related('superseded_recipe') recipes = qry.filter(root_superseded_recipe_id__in=self.root_recipe_ids, event_id=self.event_id) # Create recipe diffs rev_ids = [recipe.recipe_type_rev_id for recipe in recipes] rev_ids.extend([recipe.superseded_recipe.recipe_type_rev_id for recipe in recipes]) revs = RecipeTypeRevision.objects.get_revision_map(rev_ids, []) pair_dict = {} for recipe in recipes: rev_tuple = (recipe.superseded_recipe.recipe_type_rev_id, recipe.recipe_type_rev_id) pair = _RecipePair(recipe.superseded_recipe, recipe) if rev_tuple not in pair_dict: pair_dict[rev_tuple] = [] pair_dict[rev_tuple].append(pair) for pair_tuple, pairs in pair_dict.items(): old_revision = revs[pair_tuple[0]] new_revision = revs[pair_tuple[1]] diff = RecipeDiff(old_revision.get_definition(), new_revision.get_definition()) if self.forced_nodes: diff.set_force_reprocess(self.forced_nodes) self._recipe_diffs.append(_RecipeDiff(diff, pairs)) elif self.create_recipes_type == SUB_RECIPE_TYPE: node_names = [sub.node_name for sub in self.sub_recipes] qry = RecipeNode.objects.select_related('sub_recipe__superseded_recipe') qry = qry.filter(recipe_id=self.recipe_id, node_name__in=node_names, sub_recipe__event_id=self.event_id) recipes_by_node = {rn.node_name: rn.sub_recipe for rn in qry} recipes = list(recipes_by_node.values()) if recipes_by_node: # Set up process input dict for sub_recipe in self.sub_recipes: recipe = recipes_by_node[sub_recipe.node_name] self._process_input[recipe.id] = sub_recipe.process_input if recipes[0].superseded_recipe: # Set up recipe diffs rev_ids = [recipe.recipe_type_rev_id for recipe in recipes] rev_ids.extend([recipe.superseded_recipe.recipe_type_rev_id for recipe in recipes]) revs = RecipeTypeRevision.objects.get_revision_map(rev_ids, []) for node_name, recipe in recipes_by_node.items(): pair = _RecipePair(recipe.superseded_recipe, recipe) old_revision = revs[recipe.superseded_recipe.recipe_type_rev_id] new_revision = revs[recipe.recipe_type_rev_id] diff = RecipeDiff(old_revision.get_definition(), new_revision.get_definition()) if self.forced_nodes: sub_forced_nodes = self.forced_nodes.get_forced_nodes_for_subrecipe(node_name) if sub_forced_nodes: diff.set_force_reprocess(sub_forced_nodes) self._recipe_diffs.append(_RecipeDiff(diff, [pair])) return recipes
class BatchDefinition(object): """Represents a batch definition""" def __init__(self): """Constructor """ self.dataset = None self.supersedes = True self.root_batch_id = None self.forced_nodes = None # Derived fields self.estimated_recipes = 0 self.prev_batch_diff = None def validate(self, batch): """Validates the given batch to make sure it is valid with respect to this batch definition. The given batch must have all of its related fields populated, though id and root_batch_id may be None. The derived definition attributes, such as estimated recipe total and previous batch diff, will be populated by this method. :param batch: The batch model :type batch: :class:`batch.models.Batch` :returns: A list of warnings discovered during validation :rtype: list :raises :class:`batch.definition.exceptions.InvalidDefinition`: If the definition is invalid """ # Re-processing a previous batch if self.root_batch_id: if batch.recipe_type_id != batch.superseded_batch.recipe_type_id: raise InvalidDefinition( 'MISMATCHED_RECIPE_TYPE', 'New batch and previous batch must have the same recipe type' ) if not batch.superseded_batch.is_creation_done: raise InvalidDefinition( 'PREV_BATCH_STILL_CREATING', 'Previous batch must have completed creating all of its recipes' ) # Generate recipe diff against the previous batch recipe_def = batch.recipe_type_rev.get_definition() prev_recipe_def = batch.superseded_batch.recipe_type_rev.get_definition( ) self.prev_batch_diff = RecipeDiff(prev_recipe_def, recipe_def) if self.forced_nodes: self.prev_batch_diff.set_force_reprocess(self.forced_nodes) if not self.prev_batch_diff.can_be_reprocessed: raise InvalidDefinition( 'PREV_BATCH_NO_REPROCESS', 'Previous batch cannot be reprocessed') # New batch - need to validate dataset parameters against recipe revision elif self.dataset: from data.interface.exceptions import InvalidInterfaceConnection from data.models import DataSet from recipe.models import RecipeTypeRevision dataset_definition = DataSet.objects.get( pk=self.dataset).get_definition() recipe_type_rev = RecipeTypeRevision.objects.get_revision( name=batch.recipe_type.name, revision_num=batch.recipe_type_rev.revision_num).recipe_type # combine the parameters dataset_parameters = dataset_definition.global_parameters for param in dataset_definition.parameters.parameters: dataset_parameters.add_parameter( dataset_definition.parameters.parameters[param]) try: recipe_type_rev.get_definition( ).input_interface.validate_connection(dataset_parameters) except InvalidInterfaceConnection as ex: raise InvalidDefinition( 'MISMATCHED_PARAMS', 'No parameters in the dataset match the recipe type inputs. %s' % unicode(ex)) self._estimate_recipe_total(batch) if not self.estimated_recipes: raise InvalidDefinition( 'NO_RECIPES', 'Batch definition must result in creating at least one recipe') return [] def _estimate_recipe_total(self, batch): """Estimates the number of recipes that will be created for the given batch. The given batch must have all of its related fields populated, though id and root_batch_id may be None. :param batch: The batch model :type batch: :class:`batch.models.Batch` """ from batch.models import Batch self.estimated_recipes = 0 self.estimated_recipes += Batch.objects.calculate_estimated_recipes( batch, self)
class BatchDefinition(object): """Represents a batch definition""" def __init__(self): """Constructor """ self.root_batch_id = None self.forced_nodes = None # Derived fields self.estimated_recipes = 0 self.prev_batch_diff = None def validate(self, batch): """Validates the given batch to make sure it is valid with respect to this batch definition. The given batch must have all of its related fields populated, though id and root_batch_id may be None. The derived definition attributes, such as estimated recipe total and previous batch diff, will be populated by this method. :param batch: The batch model :type batch: :class:`batch.models.Batch` :returns: A list of warnings discovered during validation :rtype: list :raises :class:`batch.definition.exceptions.InvalidDefinition`: If the definition is invalid """ if self.root_batch_id: if batch.recipe_type_id != batch.superseded_batch.recipe_type_id: raise InvalidDefinition('MISMATCHED_RECIPE_TYPE', 'New batch and previous batch must have the same recipe type') if not batch.superseded_batch.is_creation_done: raise InvalidDefinition('PREV_BATCH_STILL_CREATING', 'Previous batch must have completed creating all of its recipes') # Generate recipe diff against the previous batch recipe_def = batch.recipe_type_rev.get_definition() prev_recipe_def = batch.superseded_batch.recipe_type_rev.get_definition() self.prev_batch_diff = RecipeDiff(prev_recipe_def, recipe_def) if self.forced_nodes: self.prev_batch_diff.set_force_reprocess(self.forced_nodes) if not self.prev_batch_diff.can_be_reprocessed: raise InvalidDefinition('PREV_BATCH_NO_REPROCESS', 'Previous batch cannot be reprocessed') self._estimate_recipe_total(batch) if not self.estimated_recipes: raise InvalidDefinition('NO_RECIPES', 'Batch definition must result in creating at least one recipe') return [] def _estimate_recipe_total(self, batch): """Estimates the number of recipes that will be created for the given batch. The given batch must have all of its related fields populated, though id and root_batch_id may be None. :param batch: The batch model :type batch: :class:`batch.models.Batch` """ self.estimated_recipes = 0 if batch.superseded_batch: self.estimated_recipes += batch.superseded_batch.recipes_total
def test_set_force_reprocess(self): """Tests calling RecipeDiff.set_force_reprocess()""" interface_1 = Interface() interface_1.add_parameter(FileParameter('file_param_1', ['image/gif'])) interface_1.add_parameter(JsonParameter('json_param_1', 'object')) interface_2 = Interface() interface_2.add_parameter(FileParameter('file_param_1', ['image/gif'])) interface_2.add_parameter(JsonParameter('json_param_1', 'object')) definition_1 = RecipeDefinition(interface_1) definition_1.add_job_node('A', 'job_type_1', '1.0', 1) definition_1.add_job_node('B', 'job_type_2', '2.0', 1) definition_1.add_job_node('C', 'job_type_3', '1.0', 2) definition_1.add_recipe_node('D', 'recipe_type_1', 1) definition_1.add_job_node('E', 'job_type_4', '1.0', 1) definition_1.add_dependency('A', 'B') definition_1.add_dependency('A', 'C') definition_1.add_dependency('C', 'D') definition_1.add_dependency('C', 'E') definition_1.add_recipe_input_connection('A', 'input_1', 'file_param_1') definition_1.add_dependency_input_connection('B', 'b_input_1', 'A', 'a_output_1') definition_1.add_dependency_input_connection('C', 'c_input_1', 'A', 'a_output_2') definition_1.add_dependency_input_connection('D', 'd_input_1', 'C', 'c_output_1') definition_1.add_recipe_input_connection('D', 'd_input_2', 'json_param_1') definition_1.add_dependency_input_connection('E', 'e_input_1', 'C', 'c_output_1') # No changes in definition 2 definition_2 = RecipeDefinition(interface_2) definition_2.add_job_node('A', 'job_type_1', '1.0', 1) definition_2.add_job_node('B', 'job_type_2', '2.0', 1) definition_2.add_job_node('C', 'job_type_3', '1.0', 2) definition_2.add_recipe_node('D', 'recipe_type_1', 1) definition_2.add_job_node('E', 'job_type_4', '1.0', 1) definition_2.add_dependency('A', 'B') definition_2.add_dependency('A', 'C') definition_2.add_dependency('C', 'D') definition_2.add_dependency('C', 'E') definition_2.add_recipe_input_connection('A', 'input_1', 'file_param_1') definition_2.add_dependency_input_connection('B', 'b_input_1', 'A', 'a_output_1') definition_2.add_dependency_input_connection('C', 'c_input_1', 'A', 'a_output_2') definition_2.add_dependency_input_connection('D', 'd_input_1', 'C', 'c_output_1') definition_2.add_recipe_input_connection('D', 'd_input_2', 'json_param_1') definition_2.add_dependency_input_connection('E', 'e_input_1', 'C', 'c_output_1') recipe_d_forced_nodes = ForcedNodes() recipe_d_forced_nodes.add_node('1') recipe_d_forced_nodes.add_node('2') top_forced_nodes = ForcedNodes() top_forced_nodes.add_node('C') top_forced_nodes.add_subrecipe('D', recipe_d_forced_nodes) diff = RecipeDiff(definition_1, definition_2) diff.set_force_reprocess(top_forced_nodes) # No recipe input changes so recipe can be reprocessed self.assertTrue(diff.can_be_reprocessed) self.assertListEqual(diff.reasons, []) # Check each node for correct fields node_a = diff.graph['A'] self.assertEqual(node_a.status, NodeDiff.UNCHANGED) self.assertFalse(node_a.reprocess_new_node) self.assertListEqual(node_a.changes, []) node_b = diff.graph['B'] self.assertEqual(node_b.status, NodeDiff.UNCHANGED) self.assertFalse(node_b.reprocess_new_node) self.assertListEqual(node_b.changes, []) node_c = diff.graph['C'] self.assertEqual(node_c.status, NodeDiff.UNCHANGED) self.assertTrue(node_c.reprocess_new_node) # Force reprocess self.assertListEqual(node_c.changes, []) node_d = diff.graph['D'] self.assertEqual(node_d.status, NodeDiff.UNCHANGED) self.assertTrue(node_d.reprocess_new_node) # Force reprocess self.assertListEqual(node_d.changes, []) # Check forced nodes object that got passed to recipe node D self.assertEqual(node_d.force_reprocess_nodes, recipe_d_forced_nodes) node_e = diff.graph['E'] self.assertEqual(node_e.status, NodeDiff.UNCHANGED) self.assertTrue( node_e.reprocess_new_node) # Force reprocess due to C being forced self.assertListEqual(node_e.changes, []) # Check nodes to copy, supersede, and unpublish self.assertSetEqual(set(diff.get_nodes_to_copy().keys()), {'A', 'B'}) self.assertSetEqual(set(diff.get_nodes_to_supersede().keys()), {'C', 'D', 'E'}) self.assertSetEqual(set(diff.get_nodes_to_unpublish().keys()), set())