def test_validate(self): """Tests calling Data.validate()""" interface = Interface() data = Data() interface.add_parameter(FileParameter('input_1', ['application/json'])) interface.add_parameter(JsonParameter('input_2', 'integer')) data.add_value(FileValue('input_1', [123])) data.add_value(JsonValue('input_2', 100)) data.add_value(JsonValue('extra_input_1', 'hello')) data.add_value(JsonValue('extra_input_2', 'there')) # Valid data data.validate(interface) # Ensure extra data values are removed self.assertSetEqual(set(data.values.keys()), {'input_1', 'input_2'}) # Data is missing required input 3 interface.add_parameter(FileParameter('input_3', ['image/gif'], required=True)) with self.assertRaises(InvalidData) as context: data.validate(interface) self.assertEqual(context.exception.error.name, 'PARAM_REQUIRED') data.add_value(FileValue('input_3', [999])) # Input 3 taken care of now # Invalid data interface.add_parameter(JsonParameter('input_4', 'string')) mock_value = MagicMock() mock_value.name = 'input_4' mock_value.validate.side_effect = InvalidData('MOCK', '') data.add_value(mock_value) with self.assertRaises(InvalidData) as context: data.validate(interface) self.assertEqual(context.exception.error.name, 'MOCK')
def test_is_data_accepted(self): """Tests calling DataFilter.is_data_accepted()""" data_filter = DataFilter(all=False) data_filter.add_filter({'name': 'input_a', 'type': 'media-type', 'condition': '==', 'values': ['application/json']}) data_filter.add_filter({'name': 'input_b', 'type': 'string', 'condition': 'contains', 'values': ['abcde']}) data_filter.add_filter({'name': 'input_c', 'type': 'integer', 'condition': '>', 'values': ['0']}) data_filter.add_filter({'name': 'input_d', 'type': 'integer', 'condition': 'between', 'values': ['0', '100']}) data_filter.add_filter({'name': 'input_f', 'type': 'meta-data', 'condition': 'in', 'values': [['foo', 'baz']], 'fields': [['a', 'b']]}) data = Data() file_value = FileValue('input_a', [self.file1.id]) data.add_value(file_value) # first filter passes, so data is accepted if all is set to false self.assertTrue(data_filter.is_data_accepted(data)) data_filter.all = True # other filters fail so data is not accepted self.assertFalse(data_filter.is_data_accepted(data)) # get other filters to pass json_value = JsonValue('input_b', 'abcdefg') data.add_value(json_value) json_value = JsonValue('input_c', '10') data.add_value(json_value) json_value = JsonValue('input_d', 50) data.add_value(json_value) file_value = FileValue('input_f', [self.file2.id]) data.add_value(file_value) self.assertTrue(data_filter.is_data_accepted(data))
def test_convert_data_to_v1_json(self): """Tests calling convert_data_to_v1_json()""" # Try interface with nothing set data = Data() interface = Interface() json = convert_data_to_v1_json(data, interface) DataV1(data=json.get_dict()) # Revalidate # Try data with a variety of values data = Data() data.add_value(FileValue('input_a', [1234])) data.add_value(FileValue('input_b', [1235, 1236])) data.add_value(JsonValue('input_c', 'hello')) data.add_value(JsonValue('input_d', 11.9)) json = convert_data_to_v1_json(data, interface) self.assertDictEqual( json.get_dict(), { u'input_data': [{ u'name': u'input_d', u'value': 11.9 }, { u'name': u'input_b', u'file_ids': [1235, 1236] }, { u'name': u'input_c', u'value': u'hello' }, { u'name': u'input_a', u'file_id': 1234 }], u'version': u'1.0' }) DataV1(data=json.get_dict()) # Revalidate self.assertSetEqual( set(DataV6(json.get_dict()).get_data().values.keys()), {'input_a', 'input_b', 'input_c', 'input_d'}) # Try data with a single file list that should be a directory data = Data() data.add_value(FileValue('input_a', [1234])) interface = Interface() file_param = FileParameter('input_a', [], True, True) interface.add_parameter(file_param) json = convert_data_to_v1_json(data, interface) self.assertDictEqual( json.get_dict(), { u'input_data': [{ u'name': u'input_a', u'file_ids': [1234] }], u'version': u'1.0' })
def test_generate_node_input_data(self): """Tests calling RecipeDefinition.generate_node_input_data()""" input_interface = Interface() input_interface.add_parameter( FileParameter('recipe_input_1', ['image/gif'], multiple=True)) input_interface.add_parameter(JsonParameter('recipe_input_2', 'string')) definition = RecipeDefinition(input_interface) definition.add_job_node('node_a', 'job_type_1', '1.0', 1) definition.add_job_node('node_b', 'job_type_2', '1.0', 1) definition.add_job_node('node_c', 'job_type_3', '1.0', 1) definition.add_dependency('node_c', 'node_b') definition.add_dependency('node_c', 'node_a') definition.add_recipe_input_connection('node_c', 'input_1', 'recipe_input_1') definition.add_recipe_input_connection('node_c', 'input_2', 'recipe_input_2') definition.add_dependency_input_connection('node_c', 'input_3', 'node_a', 'output_a_1') definition.add_dependency_input_connection('node_c', 'input_4', 'node_a', 'output_a_2') definition.add_dependency_input_connection('node_c', 'input_5', 'node_b', 'output_b_1') recipe_data = Data() recipe_data.add_value(FileValue('recipe_input_1', [1, 2, 3, 4, 5])) recipe_data.add_value(JsonValue('recipe_input_2', 'Scale is awesome!')) a_output_data = Data() a_output_data.add_value(FileValue('output_a_1', [1234])) a_output_data.add_value(JsonValue('output_a_2', {'foo': 'bar'})) b_output_data = Data() b_output_data.add_value(JsonValue('output_b_1', 12.34)) node_outputs = { 'node_a': RecipeNodeOutput('node_a', 'job', 1, a_output_data), 'node_b': RecipeNodeOutput('node_b', 'job', 1, b_output_data) } node_data = definition.generate_node_input_data( 'node_c', recipe_data, node_outputs) self.assertSetEqual( set(node_data.values.keys()), {'input_1', 'input_2', 'input_3', 'input_4', 'input_5'}) self.assertListEqual(node_data.values['input_1'].file_ids, [1, 2, 3, 4, 5]) self.assertEqual(node_data.values['input_2'].value, 'Scale is awesome!') self.assertListEqual(node_data.values['input_3'].file_ids, [1234]) self.assertDictEqual(node_data.values['input_4'].value, {'foo': 'bar'}) self.assertEqual(node_data.values['input_5'].value, 12.34)
def process_manual_ingested_source_file(self, ingest_id, source_file, when, recipe_type_id): """Processes a manual ingest where a strike or scan is not involved. All database changes are made in an atomic transaction :param ingest_id: :type ingest_id: int :param source_file: The source file that was ingested :type source_file: :class:`source.models.SourceFile` :param when: When the source file was ingested :type when: :class:`datetime.datetime` :param recipe_type_id: id of the Recipe type to kick off :type recipe_type_id: int """ recipe_type = RecipeType.objects.get(id=recipe_type_id) if recipe_type and recipe_type.is_active: recipe_data = Data() input_name = recipe_type.get_definition().get_input_keys()[0] recipe_data.add_value(FileValue(input_name, [source_file.id])) event = self._create_trigger_event(None, source_file, when) ingest_event = self._create_ingest_event(ingest_id, None, source_file, when) logger.info('Queueing new recipe of type %s %s', recipe_type.name, recipe_type.revision_num) Queue.objects.queue_new_recipe_v6(recipe_type, recipe_data, event, ingest_event) else: logger.info( 'No recipe type found for id %s or recipe type is inactive' % recipe_type_id)
def test_convert_definition_to_v6_json(self): """Tests calling convert_data_to_v6_json()""" # Try interface with nothing set definition = DataSetDefinitionV6() json = convert_definition_to_v6_json(definition.get_definition()) DataSetDefinitionV6(definition=json.get_dict(), do_validate=True) # Revalidate # Try data with a variety of values definition = DataSetDefinition(definition={}) file_param = FileParameter('input_a', ['application/json']) json_param = JsonParameter('input_b', 'integer') file_param2 = FileParameter('input_c', ['application/json']) json_param2 = JsonParameter('input_d', 'integer') definition.add_global_parameter(file_param) definition.add_global_parameter(json_param) definition.add_global_value(FileValue('input_a', [123])) definition.add_global_value(JsonValue('input_b', 100)) definition.add_parameter(file_param2) definition.add_parameter(json_param2) json = convert_definition_to_v6_json(definition) DataSetDefinitionV6(definition=json.get_dict(), do_validate=True) # Revalidate self.assertSetEqual(set(json.get_definition().get_parameters()), {'input_a', 'input_b', 'input_c', 'input_d'})
def add_file_input(self, name, file_id): """Adds a new file parameter to this job data. :param data: The files parameter dict :type data: dict """ self._new_data.add_value(FileValue(name, [file_id]))
def test_convert_data_to_v6_json(self): """Tests calling convert_data_to_v6_json()""" # Try interface with nothing set data = Data() json = convert_data_to_v6_json(data) DataV6(data=json.get_dict(), do_validate=True) # Revalidate # Try data with a variety of values data = Data() data.add_value(FileValue('input_a', [1234])) data.add_value(FileValue('input_b', [1235, 1236])) data.add_value(JsonValue('input_c', 'hello')) data.add_value(JsonValue('input_d', 11.9)) json = convert_data_to_v6_json(data) DataV6(data=json.get_dict(), do_validate=True) # Revalidate self.assertSetEqual(set(json.get_data().values.keys()), {'input_a', 'input_b', 'input_c', 'input_d'})
def test_add_value(self): """Tests calling Data.add_value()""" data = Data() file_value = FileValue('input_1', [123]) data.add_value(file_value) json_value = JsonValue('input_2', {'foo': 'bar'}) data.add_value(json_value) self.assertSetEqual(set(data.values.keys()), {'input_1', 'input_2'}) # Duplicate value dup_value = FileValue('input_1', [123]) with self.assertRaises(InvalidData) as context: data.add_value(dup_value) self.assertEqual(context.exception.error.name, 'DUPLICATE_VALUE')
def add_file_parameter(self, name, file_id): """Adds a file to the job results :param name: The output parameter name :type name: string :param file_id: The file ID :type file_id: long """ self._results_data.add_value(FileValue(name, [file_id]))
def add_file_list_input(self, name, file_ids): """Adds a new files parameter to this job data. :param name: The files parameter name :type name: string :param file_ids: The ID of the file :type file_ids: [long] """ self._new_data.add_value(FileValue(name, file_ids))
def add_file_list_parameter(self, name, file_ids): """Adds a list of files to the job results :param name: The output parameter name :type name: string :param file_ids: The file IDs :type file_ids: [long] """ self._results_data.add_value(FileValue(name, file_ids))
def process_ingested_source_file_cm(self, ingest_id, source, source_file, when): """Processes the given ingested source file by kicking off its recipe. All database changes are made in an atomic transaction. :param source: The strike that triggered the ingest :type scan: `object` :param source_file: The source file that was ingested :type source_file: :class:`source.models.SourceFile` :param when: When the source file was ingested :type when: :class:`datetime.datetime` """ # Create the recipe handler associated with the ingest strike/scan source_recipe_config = source.configuration['recipe'] recipe_name = source_recipe_config['name'] recipe_revision = source_recipe_config[ 'revision_num'] if 'revision_num' in source_recipe_config else None recipe_type = RecipeType.objects.get(name=recipe_name) if recipe_revision: recipe_type = RecipeTypeRevision.objects.get_revision( recipe_name, recipe_revision).recipe_type if len(recipe_type.get_definition().get_input_keys()) == 0: logger.info( 'No inputs defined for recipe %s. Recipe will not be run.' % recipe_name) return if recipe_type and recipe_type.is_active: # Assuming one input per recipe, so pull the first defined input you find recipe_data = Data() input_name = recipe_type.get_definition().get_input_keys()[0] recipe_data.add_value(FileValue(input_name, [source_file.id])) event = self._create_trigger_event(source, source_file, when) ingest_event = self._create_ingest_event(ingest_id, source, source_file, when) # This can cause a race condition with a slow DB. messages = create_recipes_messages( recipe_type.name, recipe_type.revision_num, convert_data_to_v6_json(recipe_data).get_dict(), event.id, ingest_event.id) CommandMessageManager().send_messages(messages) else: logger.info( 'No recipe type found for %s %s or recipe type is inactive' % (recipe_name, recipe_revision))
def get_data(self): """Returns the data represented by this JSON :returns: The data :rtype: :class:`data.data.data.Data`: """ data = Data() for name, file_ids in self._data['files'].items(): file_value = FileValue(name, file_ids) data.add_value(file_value) for name, json in self._data['json'].items(): json_value = JsonValue(name, json) data.add_value(json_value) return data
def test_add_value_from_output_data(self): """Tests calling Data.add_value_from_output_data()""" data = Data() output_data = Data() file_value = FileValue('output_1', [1, 2, 3]) output_data.add_value(file_value) json_value = JsonValue('output_2', 'hello') output_data.add_value(json_value) data.add_value_from_output_data('input_1', 'output_1', output_data) self.assertSetEqual(set(data.values.keys()), {'input_1'}) self.assertListEqual(data.values['input_1'].file_ids, [1, 2, 3]) # Duplicate parameter with self.assertRaises(InvalidData) as context: data.add_value_from_output_data('input_1', 'output_1', output_data) self.assertEqual(context.exception.error.name, 'DUPLICATE_VALUE')
def test_execute_with_data(self): """Tests calling ProcessRecipeInput.execute() successfully when the recipe already has data populated""" workspace = storage_test_utils.create_workspace() file_1 = storage_test_utils.create_file(workspace=workspace, file_size=10485760.0) file_2 = storage_test_utils.create_file(workspace=workspace, file_size=104857600.0) file_3 = storage_test_utils.create_file(workspace=workspace, file_size=987654321.0) recipe_interface = Interface() recipe_interface.add_parameter(FileParameter('input_a', ['text/plain'])) recipe_interface.add_parameter( FileParameter('input_b', ['text/plain'], multiple=True)) definition = RecipeDefinition(recipe_interface) definition_dict = convert_recipe_definition_to_v6_json( definition).get_dict() recipe_type = recipe_test_utils.create_recipe_type( definition=definition_dict) data = Data() data.add_value(FileValue('input_a', [file_1.id])) data.add_value(FileValue('input_b', [file_2.id, file_3.id])) data_dict = convert_data_to_v6_json(data).get_dict() recipe = recipe_test_utils.create_recipe(recipe_type=recipe_type, input=data_dict) # Create message message = ProcessRecipeInput() message.recipe_id = recipe.id # Execute message result = message.execute() self.assertTrue(result) recipe = Recipe.objects.get(id=recipe.id) # Check for update_recipes message self.assertEqual(len(message.new_messages), 1) self.assertEqual(message.new_messages[0].type, 'update_recipes') # Check recipe for expected input_file_size self.assertEqual(recipe.input_file_size, 1052.0) # Make sure recipe input file models are created recipe_input_files = RecipeInputFile.objects.filter( recipe_id=recipe.id) self.assertEqual(len(recipe_input_files), 3) for recipe_input_file in recipe_input_files: if recipe_input_file.input_file_id == file_1.id: self.assertEqual(recipe_input_file.recipe_input, 'input_a') elif recipe_input_file.input_file_id == file_2.id: self.assertEqual(recipe_input_file.recipe_input, 'input_b') elif recipe_input_file.input_file_id == file_3.id: self.assertEqual(recipe_input_file.recipe_input, 'input_b') else: self.fail('Invalid input file ID: %s' % recipe_input_file.input_file_id) # Test executing message again message_json_dict = message.to_json() message = ProcessRecipeInput.from_json(message_json_dict) result = message.execute() self.assertTrue(result) # Still should have update_recipes message self.assertEqual(len(message.new_messages), 1) self.assertEqual(message.new_messages[0].type, 'update_recipes') # Make sure recipe input file models are unchanged recipe_input_files = RecipeInputFile.objects.filter( recipe_id=recipe.id) self.assertEqual(len(recipe_input_files), 3)
def test_execute_with_recipe_legacy(self): """Tests calling ProcessRecipeInput.execute() successfully when a legacy sub-recipe has to get its data from its recipe """ workspace = storage_test_utils.create_workspace() file_1 = storage_test_utils.create_file(workspace=workspace, file_size=104857600.0) file_2 = storage_test_utils.create_file(workspace=workspace, file_size=987654321.0) file_3 = storage_test_utils.create_file(workspace=workspace, file_size=65456.0) file_4 = storage_test_utils.create_file(workspace=workspace, file_size=24564165456.0) manifest_a = { 'seedVersion': '1.0.0', 'job': { 'name': 'job-a', 'jobVersion': '1.0.0', 'packageVersion': '1.0.0', 'title': '', 'description': '', 'maintainer': { 'name': 'John Doe', 'email': '*****@*****.**' }, 'timeout': 10, 'interface': { 'command': '', 'inputs': { 'files': [], 'json': [] }, 'outputs': { 'files': [{ 'name': 'output_a', 'pattern': '*.png' }] } } } } job_type_a = job_test_utils.create_job_type(interface=manifest_a) output_data_a = Data() output_data_a.add_value(FileValue('output_a', [file_1.id])) output_data_a_dict = convert_data_to_v6_json(output_data_a).get_dict() manifest_b = { 'seedVersion': '1.0.0', 'job': { 'name': 'job-b', 'jobVersion': '1.0.0', 'packageVersion': '1.0.0', 'title': '', 'description': '', 'maintainer': { 'name': 'John Doe', 'email': '*****@*****.**' }, 'timeout': 10, 'interface': { 'command': '', 'inputs': { 'files': [], 'json': [] }, 'outputs': { 'files': [{ 'name': 'output_b', 'pattern': '*.png', 'multiple': True }] } } } } job_type_b = job_test_utils.create_job_type(interface=manifest_b) output_data_b = Data() output_data_b.add_value( FileValue('output_b', [file_2.id, file_3.id, file_4.id])) output_data_b_dict = convert_data_to_v6_json(output_data_b).get_dict() job_a = job_test_utils.create_job(job_type=job_type_a, num_exes=1, status='COMPLETED', output=output_data_a_dict) job_b = job_test_utils.create_job(job_type=job_type_b, num_exes=1, status='COMPLETED', output=output_data_b_dict) sub_recipe_interface_c = Interface() sub_recipe_interface_c.add_parameter( FileParameter('input_a', ['image/png'])) sub_recipe_interface_c.add_parameter( FileParameter('input_b', ['image/png'], multiple=True)) sub_recipe_def_c = RecipeDefinition(sub_recipe_interface_c) sub_recipe_def_dict_c = convert_recipe_definition_to_v1_json( sub_recipe_def_c).get_dict() sub_recipe_type_c = recipe_test_utils.create_recipe_type( definition=sub_recipe_def_dict_c) sub_recipe_c = recipe_test_utils.create_recipe( recipe_type=sub_recipe_type_c) definition = RecipeDefinition(Interface()) definition.add_job_node('node_a', job_type_a.name, job_type_a.version, job_type_a.revision_num) definition.add_job_node('node_b', job_type_b.name, job_type_b.version, job_type_b.revision_num) definition.add_recipe_node('node_c', sub_recipe_type_c.name, sub_recipe_type_c.revision_num) definition.add_dependency('node_c', 'node_a') definition.add_dependency_input_connection('node_c', 'input_a', 'node_a', 'output_a') definition.add_dependency('node_c', 'node_b') definition.add_dependency_input_connection('node_c', 'input_b', 'node_b', 'output_b') def_dict = convert_recipe_definition_to_v6_json(definition).get_dict() recipe_type = recipe_test_utils.create_recipe_type(definition=def_dict) recipe_data_dict = { 'version': '1.0', 'input_data': [], 'workspace_id': workspace.id } recipe = recipe_test_utils.create_recipe(recipe_type=recipe_type, input=recipe_data_dict) recipe_node_a = recipe_test_utils.create_recipe_node( recipe=recipe, node_name='node_a', job=job_a) recipe_node_b = recipe_test_utils.create_recipe_node( recipe=recipe, node_name='node_b', job=job_b) recipe_node_c = recipe_test_utils.create_recipe_node( recipe=recipe, node_name='node_c', sub_recipe=sub_recipe_c) RecipeNode.objects.bulk_create( [recipe_node_a, recipe_node_b, recipe_node_c]) job_a.recipe = recipe job_a.save() job_b.recipe = recipe job_b.save() sub_recipe_c.recipe = recipe sub_recipe_c.save() # Create message message = ProcessRecipeInput() message.recipe_id = sub_recipe_c.id # Execute message result = message.execute() self.assertTrue(result) sub_recipe_c = Recipe.objects.get(id=sub_recipe_c.id) # Check for update_recipes message self.assertEqual(len(message.new_messages), 1) self.assertEqual(message.new_messages[0].type, 'update_recipes') # Check sub-recipe for expected input_file_size self.assertEqual(sub_recipe_c.input_file_size, 24469.0) # Check sub-recipe for expected input data self.assertEqual( sub_recipe_c.input['version'], '1.0') # Should be legacy input data with workspace ID self.assertEqual(sub_recipe_c.input['workspace_id'], workspace.id) self.assertSetEqual(set(sub_recipe_c.get_input_data().values.keys()), {'input_a', 'input_b'}) self.assertListEqual( sub_recipe_c.get_input_data().values['input_a'].file_ids, [file_1.id]) self.assertListEqual( sub_recipe_c.get_input_data().values['input_b'].file_ids, [file_2.id, file_3.id, file_4.id]) # Make sure sub-recipe input file models are created input_files = RecipeInputFile.objects.filter(recipe_id=sub_recipe_c.id) self.assertEqual(len(input_files), 4) file_ids = {input_file.input_file_id for input_file in input_files} self.assertSetEqual(file_ids, {file_1.id, file_2.id, file_3.id, file_4.id}) # Test executing message again message_json_dict = message.to_json() message = ProcessRecipeInput.from_json(message_json_dict) result = message.execute() self.assertTrue(result) # Still should have update_recipes message self.assertEqual(len(message.new_messages), 1) self.assertEqual(message.new_messages[0].type, 'update_recipes') # Make sure recipe input file models are unchanged input_files = RecipeInputFile.objects.filter(recipe_id=sub_recipe_c.id) self.assertEqual(len(input_files), 4)
def _handle_new_batch(self, batch, definition): """Handles creating a new batch of recipes with the defined dataset, returning any messages needed for the batch :param batch: The batch :type batch: :class:`batch.models.Batch` :param definition: The batch definition :type definition: :class:`batch.definition.definition.BatchDefinition` :return: The messages needed for the re-processing :rtype: list """ messages = [] dataset = DataSet.objects.get(pk=definition.dataset) recipe_type_rev = RecipeTypeRevision.objects.get_revision( name=batch.recipe_type.name, revision_num=batch.recipe_type_rev.revision_num) # combine the parameters dataset_parameters = Batch.objects.merge_parameter_map(batch, dataset) try: recipe_type_rev.get_definition( ).input_interface.validate_connection(dataset_parameters) except InvalidInterfaceConnection as ex: # No recipe inputs match the dataset logger.info( 'None of the dataset parameters matched the recipe type inputs; No recipes will be created' ) self.is_prev_batch_done = True return messages # Get previous recipes for dataset files: ds_files = DataSetFile.objects.get_dataset_files( dataset.id).values_list('scale_file_id', flat=True) recipe_ids = RecipeInputFile.objects.filter( input_file_id__in=ds_files).values_list('recipe_id', flat=True) recipe_file_ids = RecipeInputFile.objects.filter( input_file_id__in=ds_files, recipe__recipe_type=batch.recipe_type, recipe__recipe_type_rev=batch.recipe_type_rev).values_list( 'input_file_id', flat=True) extra_files_qry = ScaleFile.objects.filter(id__in=ds_files) recipe_count = 0 # Reprocess previous recipes if definition.supersedes: if len(recipe_ids) > 0: # Create re-process messages for all recipes recipe_qry = Recipe.objects.filter( id__in=recipe_ids).order_by('-id') if self.current_recipe_id: recipe_qry = recipe_qry.filter( id__lt=self.current_recipe_id) root_recipe_ids = [] for recipe in recipe_qry.defer('input')[:MAX_RECIPE_NUM]: root_recipe_ids.append(recipe.id) self.current_recipe_id = recipe.id recipe_count = len(root_recipe_ids) if recipe_count > 0: logger.info( 'Found %d recipe(s) from previous batch to reprocess, creating messages', recipe_count) msgs = create_reprocess_messages( root_recipe_ids, batch.recipe_type.name, batch.recipe_type_rev.revision_num, batch.event_id, batch_id=batch.id, forced_nodes=definition.forced_nodes) messages.extend(msgs) # Filter down the extra files to exclude those we've already re-processed extra_files_qry = extra_files_qry.exclude(id__in=recipe_file_ids) # If we have data that didn't match any previous recipes if self.current_dataset_file_id: extra_files_qry = extra_files_qry.filter( id__lt=self.current_dataset_file_id) extra_file_ids = list( extra_files_qry.order_by('-id').values_list( 'id', flat=True)[:(MAX_RECIPE_NUM - recipe_count)]) if extra_file_ids: self.current_dataset_file_id = extra_file_ids[-1] if len(extra_file_ids) > 0: logger.info( 'Found %d files that do not have previous recipes to re-process', len(extra_file_ids)) input_data = [] for file in DataSetFile.objects.get_dataset_files( dataset.id).filter(scale_file__id__in=extra_file_ids): data = Data() parameter_name = file.parameter_name # if we needed to map the inputs to parameters: if batch.get_configuration().input_map: for param in batch.get_configuration().input_map: if param['datasetParameter'] == file.parameter_name: parameter_name = param['input'] break data.add_value(FileValue(parameter_name, [file.scale_file_id])) input_data.append(convert_data_to_v6_json(data).get_dict()) msgs = create_batch_recipes_messages( batch.recipe_type.name, batch.recipe_type.revision_num, input_data, batch.event_id, batch_id=batch.id) messages.extend(msgs) recipe_count += len(input_data) if recipe_count < MAX_RECIPE_NUM: # Handled less than the max number of recipes, so recipes from previous batch must be done self.is_prev_batch_done = True return messages
def test_validate(self): """Tests calling FileValue.validate()""" file_param = FileParameter('input_1', ['application/json']) json_param = JsonParameter('input_1', 'string') file_value = FileValue('input_1', [1234, 1235]) # Invalid parameter type with self.assertRaises(InvalidData) as context: file_value.validate(json_param) self.assertEqual(context.exception.error.name, 'MISMATCHED_PARAM_TYPE') # Zero files not accepted file_value = FileValue('input_1', []) with self.assertRaises(InvalidData) as context: file_value.validate(file_param) self.assertEqual(context.exception.error.name, 'NO_FILES') # Multiple files not accepted file_value = FileValue('input_1', [1234, 1235]) with self.assertRaises(InvalidData) as context: file_value.validate(file_param) self.assertEqual(context.exception.error.name, 'MULTIPLE_FILES') # Valid data value file_value = FileValue('input_1', [1234]) warnings = file_value.validate(file_param) self.assertListEqual(warnings, [])
def test_process_recipe_input(self): """Tests calling RecipeManager.process_recipe_input()""" date_1 = now() min_src_started_recipe_1 = date_1 - datetime.timedelta(days=200) max_src_ended_recipe_1 = date_1 + datetime.timedelta(days=200) date_2 = date_1 + datetime.timedelta(minutes=30) date_3 = date_1 + datetime.timedelta(minutes=40) date_4 = date_1 + datetime.timedelta(minutes=50) min_src_started_recipe_2 = date_1 - datetime.timedelta(days=500) max_src_ended_recipe_2 = date_1 + datetime.timedelta(days=500) s_class = 'A' s_sensor = '1' collection = '12345' task = 'abcd' workspace = storage_test_utils.create_workspace() file_1 = storage_test_utils.create_file(workspace=workspace, file_size=10485760.0, source_sensor_class=s_class, source_sensor=s_sensor, source_collection=collection, source_task=task) file_2 = storage_test_utils.create_file(workspace=workspace, file_size=104857600.0, source_started=date_2, source_ended=date_3, source_sensor_class=s_class, source_sensor=s_sensor, source_collection=collection, source_task=task) file_3 = storage_test_utils.create_file(workspace=workspace, file_size=987654321.0, source_started=min_src_started_recipe_1, source_ended=date_4) file_4 = storage_test_utils.create_file(workspace=workspace, file_size=46546.0, source_ended=max_src_ended_recipe_1) file_5 = storage_test_utils.create_file(workspace=workspace, file_size=83457.0, source_started=date_2) file_6 = storage_test_utils.create_file(workspace=workspace, file_size=42126588636633.0, source_ended=date_4) file_7 = storage_test_utils.create_file(workspace=workspace, file_size=76645464662354.0) file_8 = storage_test_utils.create_file(workspace=workspace, file_size=4654.0, source_started=min_src_started_recipe_2) file_9 = storage_test_utils.create_file(workspace=workspace, file_size=545.0, source_started=date_3, source_ended=max_src_ended_recipe_2) file_10 = storage_test_utils.create_file(workspace=workspace, file_size=0.154, source_ended=date_4, source_sensor_class=s_class, source_sensor=s_sensor, source_collection=collection, source_task=task) recipe_interface = Interface() recipe_interface.add_parameter(FileParameter('input_a', ['text/plain'])) recipe_interface.add_parameter(FileParameter('input_b', ['text/plain'], multiple=True)) definition = RecipeDefinition(recipe_interface) definition_dict = convert_recipe_definition_to_v6_json(definition).get_dict() recipe_type = recipe_test_utils.create_recipe_type_v6(definition=definition_dict) data_1 = Data() data_1.add_value(FileValue('input_a', [file_1.id])) data_1.add_value(FileValue('input_b', [file_2.id, file_3.id, file_4.id, file_5.id])) data_1_dict = convert_data_to_v6_json(data_1).get_dict() data_2 = Data() data_2.add_value(FileValue('input_a', [file_6.id])) data_2.add_value(FileValue('input_b', [file_7.id, file_8.id, file_9.id, file_10.id])) data_2_dict = convert_data_to_v6_json(data_2).get_dict() data_3 = Data() data_3_dict = convert_data_to_v6_json(data_3).get_dict() recipe_1 = recipe_test_utils.create_recipe(recipe_type=recipe_type, input=data_1_dict) recipe_2 = recipe_test_utils.create_recipe(recipe_type=recipe_type, input=data_2_dict) recipe_3 = recipe_test_utils.create_recipe(recipe_type=recipe_type, input=data_3_dict) # Execute method Recipe.objects.process_recipe_input(recipe_1) Recipe.objects.process_recipe_input(recipe_2) Recipe.objects.process_recipe_input(recipe_3) # Retrieve updated recipe models recipes = Recipe.objects.filter(id__in=[recipe_1.id, recipe_2.id, recipe_3.id]).order_by('id') recipe_1 = recipes[0] recipe_2 = recipes[1] recipe_3 = recipes[2] # Check recipes for expected fields self.assertEqual(recipe_1.input_file_size, 1053.0) self.assertEqual(recipe_1.source_started, min_src_started_recipe_1) self.assertEqual(recipe_1.source_ended, max_src_ended_recipe_1) self.assertEqual(recipe_1.source_sensor_class, s_class) self.assertEqual(recipe_1.source_sensor, s_sensor) self.assertEqual(recipe_1.source_collection, collection) self.assertEqual(recipe_1.source_task, task) self.assertEqual(recipe_2.input_file_size, 113269857.0) self.assertEqual(recipe_2.source_started, min_src_started_recipe_2) self.assertEqual(recipe_2.source_ended, max_src_ended_recipe_2) self.assertEqual(recipe_2.source_sensor_class, s_class) self.assertEqual(recipe_2.source_sensor, s_sensor) self.assertEqual(recipe_2.source_collection, collection) self.assertEqual(recipe_2.source_task, task) self.assertEqual(recipe_3.input_file_size, 0.0) self.assertIsNone(recipe_3.source_started) self.assertIsNone(recipe_3.source_ended) # Make sure recipe input file models are created recipe_input_files = RecipeInputFile.objects.filter(recipe_id=recipe_1.id) self.assertEqual(len(recipe_input_files), 5) input_files_dict = {'input_a': set(), 'input_b': set()} for recipe_input_file in recipe_input_files: input_files_dict[recipe_input_file.recipe_input].add(recipe_input_file.input_file_id) self.assertDictEqual(input_files_dict, {'input_a': {file_1.id}, 'input_b': {file_2.id, file_3.id, file_4.id, file_5.id}}) recipe_input_files = RecipeInputFile.objects.filter(recipe_id=recipe_2.id) self.assertEqual(len(recipe_input_files), 5) input_files_dict = {'input_a': set(), 'input_b': set()} for recipe_input_file in recipe_input_files: input_files_dict[recipe_input_file.recipe_input].add(recipe_input_file.input_file_id) self.assertDictEqual(input_files_dict, {'input_a': {file_6.id}, 'input_b': {file_7.id, file_8.id, file_9.id, file_10.id}}) self.assertEqual(RecipeInputFile.objects.filter(recipe_id=recipe_3.id).count(), 0)