예제 #1
0
    def process_manual_ingested_source_file(self, ingest_id, source_file, when,
                                            recipe_type_id):
        """Processes a manual ingest where a strike or scan is not involved. All database
        changes are made in an atomic transaction

        :param ingest_id:
        :type ingest_id: int

        :param source_file: The source file that was ingested
        :type source_file: :class:`source.models.SourceFile`
        :param when: When the source file was ingested
        :type when: :class:`datetime.datetime`
        :param recipe_type_id: id of the Recipe type to kick off
        :type recipe_type_id: int
        """

        recipe_type = RecipeType.objects.get(id=recipe_type_id)

        if recipe_type and recipe_type.is_active:
            recipe_data = Data()
            input_name = recipe_type.get_definition().get_input_keys()[0]
            recipe_data.add_value(FileValue(input_name, [source_file.id]))
            event = self._create_trigger_event(None, source_file, when)
            ingest_event = self._create_ingest_event(ingest_id, None,
                                                     source_file, when)

            logger.info('Queueing new recipe of type %s %s', recipe_type.name,
                        recipe_type.revision_num)
            Queue.objects.queue_new_recipe_v6(recipe_type, recipe_data, event,
                                              ingest_event)

        else:
            logger.info(
                'No recipe type found for id %s or recipe type is inactive' %
                recipe_type_id)
예제 #2
0
    def test_validate(self):
        """Tests calling Data.validate()"""

        interface = Interface()
        data = Data()

        interface.add_parameter(FileParameter('input_1', ['application/json']))
        interface.add_parameter(JsonParameter('input_2', 'integer'))
        data.add_value(FileValue('input_1', [123]))
        data.add_value(JsonValue('input_2', 100))
        data.add_value(JsonValue('extra_input_1', 'hello'))
        data.add_value(JsonValue('extra_input_2', 'there'))

        # Valid data
        data.validate(interface)
        # Ensure extra data values are removed
        self.assertSetEqual(set(data.values.keys()), {'input_1', 'input_2'})

        # Data is missing required input 3
        interface.add_parameter(FileParameter('input_3', ['image/gif'], required=True))
        with self.assertRaises(InvalidData) as context:
            data.validate(interface)
        self.assertEqual(context.exception.error.name, 'PARAM_REQUIRED')

        data.add_value(FileValue('input_3', [999]))  # Input 3 taken care of now

        # Invalid data
        interface.add_parameter(JsonParameter('input_4', 'string'))
        mock_value = MagicMock()
        mock_value.name = 'input_4'
        mock_value.validate.side_effect = InvalidData('MOCK', '')
        data.add_value(mock_value)
        with self.assertRaises(InvalidData) as context:
            data.validate(interface)
        self.assertEqual(context.exception.error.name, 'MOCK')
예제 #3
0
    def process_event(self, event, last_event=None):
        """See :meth:`job.clock.ClockEventProcessor.process_event`.

        Compares the new event with the last event any missing metrics jobs.
        """

        # Attempt to get the daily metrics job type
        try:
            job_type = JobType.objects.filter(
                name='scale-daily-metrics').last()
        except JobType.DoesNotExist:
            raise ClockEventError(
                'Missing required job type: scale-daily-metrics')

        if last_event:
            # Build a list of days that require metrics
            day_count = xrange(
                (event.occurred.date() - last_event.occurred.date()).days)
            days = [
                last_event.occurred.date() + datetime.timedelta(days=d)
                for d in day_count
            ]
        else:
            # Use the previous day when first triggered
            days = [timezone.now().date() - datetime.timedelta(days=1)]

        # Schedule one job for each required day
        for day in days:
            job_data = Data()
            job_data.add_value(JsonValue('DAY', day.strftime('%Y-%m-%d')))
            job = Queue.objects.queue_new_job_v6(job_type, job_data, event)
            CommandMessageManager().send_messages(
                create_process_job_input_messages([job.id]))
예제 #4
0
파일: test_filter.py 프로젝트: sau29/scale
    def test_is_data_accepted(self):
        """Tests calling DataFilter.is_data_accepted()"""

        data_filter = DataFilter(all=False)
        data_filter.add_filter({'name': 'input_a', 'type': 'media-type', 'condition': '==', 'values': ['application/json']})
        data_filter.add_filter({'name': 'input_b', 'type': 'string', 'condition': 'contains', 'values': ['abcde']})
        data_filter.add_filter({'name': 'input_c', 'type': 'integer', 'condition': '>', 'values': ['0']})
        data_filter.add_filter({'name': 'input_d', 'type': 'integer', 'condition': 'between', 'values': ['0', '100']})
        data_filter.add_filter({'name': 'input_f', 'type': 'meta-data', 'condition': 'in', 'values': [['foo', 'baz']],
                                'fields': [['a', 'b']]})
        
        data = Data()

        file_value = FileValue('input_a', [self.file1.id])
        data.add_value(file_value)
        
        # first filter passes, so data is accepted if all is set to false
        self.assertTrue(data_filter.is_data_accepted(data))
        data_filter.all = True
        # other filters fail so data is not accepted
        self.assertFalse(data_filter.is_data_accepted(data))
        
        # get other filters to pass
        json_value = JsonValue('input_b', 'abcdefg')
        data.add_value(json_value)
        json_value = JsonValue('input_c', '10')
        data.add_value(json_value)
        json_value = JsonValue('input_d', 50)
        data.add_value(json_value)
        file_value = FileValue('input_f', [self.file2.id])
        data.add_value(file_value)

        self.assertTrue(data_filter.is_data_accepted(data))
예제 #5
0
    def test_convert_data_to_v1_json(self):
        """Tests calling convert_data_to_v1_json()"""

        # Try interface with nothing set
        data = Data()
        interface = Interface()
        json = convert_data_to_v1_json(data, interface)
        DataV1(data=json.get_dict())  # Revalidate

        # Try data with a variety of values
        data = Data()
        data.add_value(FileValue('input_a', [1234]))
        data.add_value(FileValue('input_b', [1235, 1236]))
        data.add_value(JsonValue('input_c', 'hello'))
        data.add_value(JsonValue('input_d', 11.9))
        json = convert_data_to_v1_json(data, interface)
        self.assertDictEqual(
            json.get_dict(), {
                u'input_data': [{
                    u'name': u'input_d',
                    u'value': 11.9
                }, {
                    u'name': u'input_b',
                    u'file_ids': [1235, 1236]
                }, {
                    u'name': u'input_c',
                    u'value': u'hello'
                }, {
                    u'name': u'input_a',
                    u'file_id': 1234
                }],
                u'version':
                u'1.0'
            })
        DataV1(data=json.get_dict())  # Revalidate
        self.assertSetEqual(
            set(DataV6(json.get_dict()).get_data().values.keys()),
            {'input_a', 'input_b', 'input_c', 'input_d'})

        # Try data with a single file list that should be a directory
        data = Data()
        data.add_value(FileValue('input_a', [1234]))
        interface = Interface()
        file_param = FileParameter('input_a', [], True, True)
        interface.add_parameter(file_param)
        json = convert_data_to_v1_json(data, interface)

        self.assertDictEqual(
            json.get_dict(), {
                u'input_data': [{
                    u'name': u'input_a',
                    u'file_ids': [1234]
                }],
                u'version': u'1.0'
            })
예제 #6
0
    def process_ingested_source_file_cm(self, ingest_id, source, source_file,
                                        when):
        """Processes the given ingested source file by kicking off its recipe.
        All database changes are made in an atomic transaction.

        :param source: The strike that triggered the ingest
        :type scan: `object`

        :param source_file: The source file that was ingested
        :type source_file: :class:`source.models.SourceFile`
        :param when: When the source file was ingested
        :type when: :class:`datetime.datetime`
        """

        # Create the recipe handler associated with the ingest strike/scan
        source_recipe_config = source.configuration['recipe']
        recipe_name = source_recipe_config['name']
        recipe_revision = source_recipe_config[
            'revision_num'] if 'revision_num' in source_recipe_config else None

        recipe_type = RecipeType.objects.get(name=recipe_name)
        if recipe_revision:
            recipe_type = RecipeTypeRevision.objects.get_revision(
                recipe_name, recipe_revision).recipe_type

        if len(recipe_type.get_definition().get_input_keys()) == 0:
            logger.info(
                'No inputs defined for recipe %s. Recipe will not be run.' %
                recipe_name)
            return

        if recipe_type and recipe_type.is_active:
            # Assuming one input per recipe, so pull the first defined input you find
            recipe_data = Data()
            input_name = recipe_type.get_definition().get_input_keys()[0]
            recipe_data.add_value(FileValue(input_name, [source_file.id]))
            event = self._create_trigger_event(source, source_file, when)
            ingest_event = self._create_ingest_event(ingest_id, source,
                                                     source_file, when)

            # This can cause a race condition with a slow DB.
            messages = create_recipes_messages(
                recipe_type.name, recipe_type.revision_num,
                convert_data_to_v6_json(recipe_data).get_dict(), event.id,
                ingest_event.id)
            CommandMessageManager().send_messages(messages)

        else:
            logger.info(
                'No recipe type found for %s %s or recipe type is inactive' %
                (recipe_name, recipe_revision))
예제 #7
0
    def test_json(self):
        """Tests converting a ProcessCondition message to and from JSON"""

        definition = RecipeDefinition(Interface())

        cond_interface_1 = Interface()
        cond_interface_1.add_parameter(JsonParameter('cond_int', 'integer'))
        df1 = DataFilter(filter_list=[{
            'name': 'cond_int',
            'type': 'integer',
            'condition': '==',
            'values': [0]
        }])
        definition = RecipeDefinition(cond_interface_1)
        definition.add_condition_node('node_a', cond_interface_1, df1)
        definition.add_recipe_input_connection('node_a', 'cond_int',
                                               'cond_int')

        definition_dict = convert_recipe_definition_to_v6_json(
            definition).get_dict()
        recipe_type = recipe_test_utils.create_recipe_type_v6(
            definition=definition_dict)

        data_1 = Data()
        data_1.add_value(JsonValue('cond_int', 0))
        data_1_dict = convert_data_to_v6_json(data_1).get_dict()
        recipe = recipe_test_utils.create_recipe(recipe_type=recipe_type,
                                                 input=data_1_dict)
        condition = recipe_test_utils.create_recipe_condition(recipe=recipe,
                                                              save=True)
        recipe_test_utils.create_recipe_node(recipe=recipe,
                                             node_name='node_a',
                                             condition=condition,
                                             save=True)

        # Create message
        message = create_process_condition_messages([condition.id])[0]

        # Convert message to JSON and back, and then execute
        message_json_dict = message.to_json()
        new_message = ProcessCondition.from_json(message_json_dict)
        result = new_message.execute()

        self.assertTrue(result)
        condition = RecipeCondition.objects.get(id=condition.id)
        self.assertEqual(len(new_message.new_messages), 1)
        self.assertEqual(new_message.new_messages[0].type, 'update_recipe')
        self.assertEqual(new_message.new_messages[0].root_recipe_id, recipe.id)
        self.assertTrue(condition.is_processed)
        self.assertIsNotNone(condition.processed)
        self.assertTrue(condition.is_accepted)
예제 #8
0
    def test_generate_node_input_data(self):
        """Tests calling RecipeDefinition.generate_node_input_data()"""

        input_interface = Interface()
        input_interface.add_parameter(
            FileParameter('recipe_input_1', ['image/gif'], multiple=True))
        input_interface.add_parameter(JsonParameter('recipe_input_2',
                                                    'string'))
        definition = RecipeDefinition(input_interface)
        definition.add_job_node('node_a', 'job_type_1', '1.0', 1)
        definition.add_job_node('node_b', 'job_type_2', '1.0', 1)
        definition.add_job_node('node_c', 'job_type_3', '1.0', 1)
        definition.add_dependency('node_c', 'node_b')
        definition.add_dependency('node_c', 'node_a')
        definition.add_recipe_input_connection('node_c', 'input_1',
                                               'recipe_input_1')
        definition.add_recipe_input_connection('node_c', 'input_2',
                                               'recipe_input_2')
        definition.add_dependency_input_connection('node_c', 'input_3',
                                                   'node_a', 'output_a_1')
        definition.add_dependency_input_connection('node_c', 'input_4',
                                                   'node_a', 'output_a_2')
        definition.add_dependency_input_connection('node_c', 'input_5',
                                                   'node_b', 'output_b_1')

        recipe_data = Data()
        recipe_data.add_value(FileValue('recipe_input_1', [1, 2, 3, 4, 5]))
        recipe_data.add_value(JsonValue('recipe_input_2', 'Scale is awesome!'))
        a_output_data = Data()
        a_output_data.add_value(FileValue('output_a_1', [1234]))
        a_output_data.add_value(JsonValue('output_a_2', {'foo': 'bar'}))
        b_output_data = Data()
        b_output_data.add_value(JsonValue('output_b_1', 12.34))
        node_outputs = {
            'node_a': RecipeNodeOutput('node_a', 'job', 1, a_output_data),
            'node_b': RecipeNodeOutput('node_b', 'job', 1, b_output_data)
        }

        node_data = definition.generate_node_input_data(
            'node_c', recipe_data, node_outputs)
        self.assertSetEqual(
            set(node_data.values.keys()),
            {'input_1', 'input_2', 'input_3', 'input_4', 'input_5'})
        self.assertListEqual(node_data.values['input_1'].file_ids,
                             [1, 2, 3, 4, 5])
        self.assertEqual(node_data.values['input_2'].value,
                         'Scale is awesome!')
        self.assertListEqual(node_data.values['input_3'].file_ids, [1234])
        self.assertDictEqual(node_data.values['input_4'].value, {'foo': 'bar'})
        self.assertEqual(node_data.values['input_5'].value, 12.34)
예제 #9
0
    def get_data(self):
        """Returns the data represented by this JSON

        :returns: The data
        :rtype: :class:`data.data.data.Data`:
        """

        data = Data()

        for name, file_ids in self._data['files'].items():
            file_value = FileValue(name, file_ids)
            data.add_value(file_value)
        for name, json in self._data['json'].items():
            json_value = JsonValue(name, json)
            data.add_value(json_value)

        return data
예제 #10
0
    def test_add_value_from_output_data(self):
        """Tests calling Data.add_value_from_output_data()"""

        data = Data()
        output_data = Data()

        file_value = FileValue('output_1', [1, 2, 3])
        output_data.add_value(file_value)
        json_value = JsonValue('output_2', 'hello')
        output_data.add_value(json_value)

        data.add_value_from_output_data('input_1', 'output_1', output_data)
        self.assertSetEqual(set(data.values.keys()), {'input_1'})
        self.assertListEqual(data.values['input_1'].file_ids, [1, 2, 3])

        # Duplicate parameter
        with self.assertRaises(InvalidData) as context:
            data.add_value_from_output_data('input_1', 'output_1', output_data)
        self.assertEqual(context.exception.error.name, 'DUPLICATE_VALUE')
예제 #11
0
    def test_convert_data_to_v6_json(self):
        """Tests calling convert_data_to_v6_json()"""

        # Try interface with nothing set
        data = Data()
        json = convert_data_to_v6_json(data)
        DataV6(data=json.get_dict(), do_validate=True)  # Revalidate

        # Try data with a variety of values
        data = Data()
        data.add_value(FileValue('input_a', [1234]))
        data.add_value(FileValue('input_b', [1235, 1236]))
        data.add_value(JsonValue('input_c', 'hello'))
        data.add_value(JsonValue('input_d', 11.9))
        json = convert_data_to_v6_json(data)
        DataV6(data=json.get_dict(), do_validate=True)  # Revalidate
        self.assertSetEqual(set(json.get_data().values.keys()), {'input_a', 'input_b', 'input_c', 'input_d'})
예제 #12
0
    def execute(self):
        """See :meth:`messaging.messages.message.CommandMessage.execute`
        """
        from ingest.models import Ingest
        ingest_job_type = Ingest.objects.get_ingest_job_type()
        
        # Grab the ingest object
        ingest = Ingest.objects.get(pk=self.ingest_id)
        
        when = ingest.transfer_ended if ingest.transfer_ended else now()
        desc = {'file_name': ingest.file_name}

        event = None
        ingest_id = ingest.id
        with transaction.atomic():
            # Create the appropriate triggerevent
            if self.create_ingest_type == STRIKE_JOB_TYPE:
                desc['strike_id'] = self.strike_id
                event =  TriggerEvent.objects.create_trigger_event('STRIKE_TRANSFER', None, desc, when)
            elif self.create_ingest_type == SCAN_JOB_TYPE:
                ingest_id = Ingest.objects.get(scan_id=self.scan_id, file_name=ingest.file_name).id
                desc['scan_id'] = self.scan_id
                event = TriggerEvent.objects.create_trigger_event('SCAN_TRANSFER', None, desc, when)
            
        data = Data()
        data.add_value(JsonValue('ingest_id', ingest_id))
        data.add_value(JsonValue('workspace', ingest.workspace.name))
        if ingest.new_workspace:
            data.add_value(JsonValue('new_workspace', ingest.new_workspace.name))

        ingest_job = None
        with transaction.atomic():
            ingest_job = Queue.objects.queue_new_job_v6(ingest_job_type, data, event)
            ingest.job = ingest_job
            ingest.status = 'QUEUED'
            ingest.save()
            
        # Send message to start processing job input (done outside the transaction to hope the job exists)
        # This can cause a race condition with a slow DB.
        job = Job.objects.get_details(ingest_job.id)
        self.new_messages.extend(create_process_job_input_messages([job.id]))
        
        return True
 
        
예제 #13
0
    def test_add_value(self):
        """Tests calling Data.add_value()"""

        data = Data()

        file_value = FileValue('input_1', [123])
        data.add_value(file_value)

        json_value = JsonValue('input_2', {'foo': 'bar'})
        data.add_value(json_value)

        self.assertSetEqual(set(data.values.keys()), {'input_1', 'input_2'})

        # Duplicate value
        dup_value = FileValue('input_1', [123])
        with self.assertRaises(InvalidData) as context:
            data.add_value(dup_value)
        self.assertEqual(context.exception.error.name, 'DUPLICATE_VALUE')
예제 #14
0
    def execute(self):
        """See :meth:`messaging.messages.message.CommandMessage.execute`
        """
        from ingest.models import Ingest
        ingest_job_type = Ingest.objects.get_ingest_job_type()

        # Grab the ingest object
        ingest = Ingest.objects.get(pk=self.ingest_id)

        when = ingest.transfer_ended if ingest.transfer_ended else now()
        desc = {'file_name': ingest.file_name}

        event = None
        ingest_id = ingest.id
        with transaction.atomic():
            # Create the appropriate triggerevent
            if self.create_ingest_type == STRIKE_JOB_TYPE:
                desc['strike_id'] = self.strike_id
                event = TriggerEvent.objects.create_trigger_event(
                    'STRIKE_TRANSFER', None, desc, when)
            elif self.create_ingest_type == SCAN_JOB_TYPE:
                ingest_id = Ingest.objects.get(scan_id=self.scan_id,
                                               file_name=ingest.file_name).id
                desc['scan_id'] = self.scan_id
                event = TriggerEvent.objects.create_trigger_event(
                    'SCAN_TRANSFER', None, desc, when)

        data = Data()
        data.add_value(JsonValue('ingest_id', ingest_id))
        data.add_value(JsonValue('workspace', ingest.workspace.name))
        if ingest.new_workspace:
            data.add_value(
                JsonValue('new_workspace', ingest.new_workspace.name))

        ingest_job = None
        with transaction.atomic():
            ingest_job = Queue.objects.queue_new_job_v6(
                ingest_job_type, data, event)
            ingest.job = ingest_job
            ingest.status = 'QUEUED'
            ingest.save()

        return True
예제 #15
0
    def test_execute_with_data(self):
        """Tests calling ProcessRecipeInput.execute() successfully when the recipe already has data populated"""

        workspace = storage_test_utils.create_workspace()
        file_1 = storage_test_utils.create_file(workspace=workspace,
                                                file_size=10485760.0)
        file_2 = storage_test_utils.create_file(workspace=workspace,
                                                file_size=104857600.0)
        file_3 = storage_test_utils.create_file(workspace=workspace,
                                                file_size=987654321.0)
        recipe_interface = Interface()
        recipe_interface.add_parameter(FileParameter('input_a',
                                                     ['text/plain']))
        recipe_interface.add_parameter(
            FileParameter('input_b', ['text/plain'], multiple=True))
        definition = RecipeDefinition(recipe_interface)
        definition_dict = convert_recipe_definition_to_v6_json(
            definition).get_dict()
        recipe_type = recipe_test_utils.create_recipe_type(
            definition=definition_dict)

        data = Data()
        data.add_value(FileValue('input_a', [file_1.id]))
        data.add_value(FileValue('input_b', [file_2.id, file_3.id]))
        data_dict = convert_data_to_v6_json(data).get_dict()
        recipe = recipe_test_utils.create_recipe(recipe_type=recipe_type,
                                                 input=data_dict)

        # Create message
        message = ProcessRecipeInput()
        message.recipe_id = recipe.id

        # Execute message
        result = message.execute()
        self.assertTrue(result)

        recipe = Recipe.objects.get(id=recipe.id)
        # Check for update_recipes message
        self.assertEqual(len(message.new_messages), 1)
        self.assertEqual(message.new_messages[0].type, 'update_recipes')

        # Check recipe for expected input_file_size
        self.assertEqual(recipe.input_file_size, 1052.0)

        # Make sure recipe input file models are created
        recipe_input_files = RecipeInputFile.objects.filter(
            recipe_id=recipe.id)
        self.assertEqual(len(recipe_input_files), 3)
        for recipe_input_file in recipe_input_files:
            if recipe_input_file.input_file_id == file_1.id:
                self.assertEqual(recipe_input_file.recipe_input, 'input_a')
            elif recipe_input_file.input_file_id == file_2.id:
                self.assertEqual(recipe_input_file.recipe_input, 'input_b')
            elif recipe_input_file.input_file_id == file_3.id:
                self.assertEqual(recipe_input_file.recipe_input, 'input_b')
            else:
                self.fail('Invalid input file ID: %s' %
                          recipe_input_file.input_file_id)

        # Test executing message again
        message_json_dict = message.to_json()
        message = ProcessRecipeInput.from_json(message_json_dict)
        result = message.execute()
        self.assertTrue(result)

        # Still should have update_recipes message
        self.assertEqual(len(message.new_messages), 1)
        self.assertEqual(message.new_messages[0].type, 'update_recipes')

        # Make sure recipe input file models are unchanged
        recipe_input_files = RecipeInputFile.objects.filter(
            recipe_id=recipe.id)
        self.assertEqual(len(recipe_input_files), 3)
예제 #16
0
    def test_execute_with_recipe_legacy(self):
        """Tests calling ProcessRecipeInput.execute() successfully when a legacy sub-recipe has to get its data from its
        recipe
        """

        workspace = storage_test_utils.create_workspace()
        file_1 = storage_test_utils.create_file(workspace=workspace,
                                                file_size=104857600.0)
        file_2 = storage_test_utils.create_file(workspace=workspace,
                                                file_size=987654321.0)
        file_3 = storage_test_utils.create_file(workspace=workspace,
                                                file_size=65456.0)
        file_4 = storage_test_utils.create_file(workspace=workspace,
                                                file_size=24564165456.0)
        manifest_a = {
            'seedVersion': '1.0.0',
            'job': {
                'name': 'job-a',
                'jobVersion': '1.0.0',
                'packageVersion': '1.0.0',
                'title': '',
                'description': '',
                'maintainer': {
                    'name': 'John Doe',
                    'email': '*****@*****.**'
                },
                'timeout': 10,
                'interface': {
                    'command': '',
                    'inputs': {
                        'files': [],
                        'json': []
                    },
                    'outputs': {
                        'files': [{
                            'name': 'output_a',
                            'pattern': '*.png'
                        }]
                    }
                }
            }
        }
        job_type_a = job_test_utils.create_job_type(interface=manifest_a)
        output_data_a = Data()
        output_data_a.add_value(FileValue('output_a', [file_1.id]))
        output_data_a_dict = convert_data_to_v6_json(output_data_a).get_dict()
        manifest_b = {
            'seedVersion': '1.0.0',
            'job': {
                'name': 'job-b',
                'jobVersion': '1.0.0',
                'packageVersion': '1.0.0',
                'title': '',
                'description': '',
                'maintainer': {
                    'name': 'John Doe',
                    'email': '*****@*****.**'
                },
                'timeout': 10,
                'interface': {
                    'command': '',
                    'inputs': {
                        'files': [],
                        'json': []
                    },
                    'outputs': {
                        'files': [{
                            'name': 'output_b',
                            'pattern': '*.png',
                            'multiple': True
                        }]
                    }
                }
            }
        }
        job_type_b = job_test_utils.create_job_type(interface=manifest_b)
        output_data_b = Data()
        output_data_b.add_value(
            FileValue('output_b', [file_2.id, file_3.id, file_4.id]))
        output_data_b_dict = convert_data_to_v6_json(output_data_b).get_dict()
        job_a = job_test_utils.create_job(job_type=job_type_a,
                                          num_exes=1,
                                          status='COMPLETED',
                                          output=output_data_a_dict)
        job_b = job_test_utils.create_job(job_type=job_type_b,
                                          num_exes=1,
                                          status='COMPLETED',
                                          output=output_data_b_dict)
        sub_recipe_interface_c = Interface()
        sub_recipe_interface_c.add_parameter(
            FileParameter('input_a', ['image/png']))
        sub_recipe_interface_c.add_parameter(
            FileParameter('input_b', ['image/png'], multiple=True))
        sub_recipe_def_c = RecipeDefinition(sub_recipe_interface_c)
        sub_recipe_def_dict_c = convert_recipe_definition_to_v1_json(
            sub_recipe_def_c).get_dict()
        sub_recipe_type_c = recipe_test_utils.create_recipe_type(
            definition=sub_recipe_def_dict_c)
        sub_recipe_c = recipe_test_utils.create_recipe(
            recipe_type=sub_recipe_type_c)

        definition = RecipeDefinition(Interface())
        definition.add_job_node('node_a', job_type_a.name, job_type_a.version,
                                job_type_a.revision_num)
        definition.add_job_node('node_b', job_type_b.name, job_type_b.version,
                                job_type_b.revision_num)
        definition.add_recipe_node('node_c', sub_recipe_type_c.name,
                                   sub_recipe_type_c.revision_num)
        definition.add_dependency('node_c', 'node_a')
        definition.add_dependency_input_connection('node_c', 'input_a',
                                                   'node_a', 'output_a')
        definition.add_dependency('node_c', 'node_b')
        definition.add_dependency_input_connection('node_c', 'input_b',
                                                   'node_b', 'output_b')
        def_dict = convert_recipe_definition_to_v6_json(definition).get_dict()
        recipe_type = recipe_test_utils.create_recipe_type(definition=def_dict)
        recipe_data_dict = {
            'version': '1.0',
            'input_data': [],
            'workspace_id': workspace.id
        }
        recipe = recipe_test_utils.create_recipe(recipe_type=recipe_type,
                                                 input=recipe_data_dict)
        recipe_node_a = recipe_test_utils.create_recipe_node(
            recipe=recipe, node_name='node_a', job=job_a)
        recipe_node_b = recipe_test_utils.create_recipe_node(
            recipe=recipe, node_name='node_b', job=job_b)
        recipe_node_c = recipe_test_utils.create_recipe_node(
            recipe=recipe, node_name='node_c', sub_recipe=sub_recipe_c)
        RecipeNode.objects.bulk_create(
            [recipe_node_a, recipe_node_b, recipe_node_c])
        job_a.recipe = recipe
        job_a.save()
        job_b.recipe = recipe
        job_b.save()
        sub_recipe_c.recipe = recipe
        sub_recipe_c.save()

        # Create message
        message = ProcessRecipeInput()
        message.recipe_id = sub_recipe_c.id

        # Execute message
        result = message.execute()
        self.assertTrue(result)

        sub_recipe_c = Recipe.objects.get(id=sub_recipe_c.id)
        # Check for update_recipes message
        self.assertEqual(len(message.new_messages), 1)
        self.assertEqual(message.new_messages[0].type, 'update_recipes')

        # Check sub-recipe for expected input_file_size
        self.assertEqual(sub_recipe_c.input_file_size, 24469.0)
        # Check sub-recipe for expected input data
        self.assertEqual(
            sub_recipe_c.input['version'],
            '1.0')  # Should be legacy input data with workspace ID
        self.assertEqual(sub_recipe_c.input['workspace_id'], workspace.id)
        self.assertSetEqual(set(sub_recipe_c.get_input_data().values.keys()),
                            {'input_a', 'input_b'})
        self.assertListEqual(
            sub_recipe_c.get_input_data().values['input_a'].file_ids,
            [file_1.id])
        self.assertListEqual(
            sub_recipe_c.get_input_data().values['input_b'].file_ids,
            [file_2.id, file_3.id, file_4.id])

        # Make sure sub-recipe input file models are created
        input_files = RecipeInputFile.objects.filter(recipe_id=sub_recipe_c.id)
        self.assertEqual(len(input_files), 4)
        file_ids = {input_file.input_file_id for input_file in input_files}
        self.assertSetEqual(file_ids,
                            {file_1.id, file_2.id, file_3.id, file_4.id})

        # Test executing message again
        message_json_dict = message.to_json()
        message = ProcessRecipeInput.from_json(message_json_dict)
        result = message.execute()
        self.assertTrue(result)

        # Still should have update_recipes message
        self.assertEqual(len(message.new_messages), 1)
        self.assertEqual(message.new_messages[0].type, 'update_recipes')

        # Make sure recipe input file models are unchanged
        input_files = RecipeInputFile.objects.filter(recipe_id=sub_recipe_c.id)
        self.assertEqual(len(input_files), 4)
예제 #17
0
    def _handle_new_batch(self, batch, definition):
        """Handles creating a new batch of recipes with the defined dataset, returning any messages needed for the batch
        
        :param batch: The batch
        :type batch: :class:`batch.models.Batch`
        :param definition: The batch definition
        :type definition: :class:`batch.definition.definition.BatchDefinition`
        :return: The messages needed for the re-processing
        :rtype: list
        """

        messages = []
        dataset = DataSet.objects.get(pk=definition.dataset)
        recipe_type_rev = RecipeTypeRevision.objects.get_revision(
            name=batch.recipe_type.name,
            revision_num=batch.recipe_type_rev.revision_num)

        # combine the parameters
        dataset_parameters = Batch.objects.merge_parameter_map(batch, dataset)

        try:
            recipe_type_rev.get_definition(
            ).input_interface.validate_connection(dataset_parameters)
        except InvalidInterfaceConnection as ex:
            # No recipe inputs match the dataset
            logger.info(
                'None of the dataset parameters matched the recipe type inputs; No recipes will be created'
            )
            self.is_prev_batch_done = True
            return messages

        # Get previous recipes for dataset files:
        ds_files = DataSetFile.objects.get_dataset_files(
            dataset.id).values_list('scale_file_id', flat=True)
        recipe_ids = RecipeInputFile.objects.filter(
            input_file_id__in=ds_files).values_list('recipe_id', flat=True)
        recipe_file_ids = RecipeInputFile.objects.filter(
            input_file_id__in=ds_files,
            recipe__recipe_type=batch.recipe_type,
            recipe__recipe_type_rev=batch.recipe_type_rev).values_list(
                'input_file_id', flat=True)
        extra_files_qry = ScaleFile.objects.filter(id__in=ds_files)

        recipe_count = 0
        # Reprocess previous recipes
        if definition.supersedes:
            if len(recipe_ids) > 0:
                # Create re-process messages for all recipes
                recipe_qry = Recipe.objects.filter(
                    id__in=recipe_ids).order_by('-id')
                if self.current_recipe_id:
                    recipe_qry = recipe_qry.filter(
                        id__lt=self.current_recipe_id)

                root_recipe_ids = []
                for recipe in recipe_qry.defer('input')[:MAX_RECIPE_NUM]:
                    root_recipe_ids.append(recipe.id)
                    self.current_recipe_id = recipe.id
                recipe_count = len(root_recipe_ids)

                if recipe_count > 0:
                    logger.info(
                        'Found %d recipe(s) from previous batch to reprocess, creating messages',
                        recipe_count)
                    msgs = create_reprocess_messages(
                        root_recipe_ids,
                        batch.recipe_type.name,
                        batch.recipe_type_rev.revision_num,
                        batch.event_id,
                        batch_id=batch.id,
                        forced_nodes=definition.forced_nodes)
                    messages.extend(msgs)

            # Filter down the extra files to exclude those we've already re-processed
            extra_files_qry = extra_files_qry.exclude(id__in=recipe_file_ids)

        # If we have data that didn't match any previous recipes
        if self.current_dataset_file_id:
            extra_files_qry = extra_files_qry.filter(
                id__lt=self.current_dataset_file_id)
        extra_file_ids = list(
            extra_files_qry.order_by('-id').values_list(
                'id', flat=True)[:(MAX_RECIPE_NUM - recipe_count)])

        if extra_file_ids:
            self.current_dataset_file_id = extra_file_ids[-1]

        if len(extra_file_ids) > 0:
            logger.info(
                'Found %d files that do not have previous recipes to re-process',
                len(extra_file_ids))

            input_data = []
            for file in DataSetFile.objects.get_dataset_files(
                    dataset.id).filter(scale_file__id__in=extra_file_ids):
                data = Data()
                parameter_name = file.parameter_name

                # if we needed to map the inputs to parameters:
                if batch.get_configuration().input_map:
                    for param in batch.get_configuration().input_map:
                        if param['datasetParameter'] == file.parameter_name:
                            parameter_name = param['input']
                            break

                data.add_value(FileValue(parameter_name, [file.scale_file_id]))
                input_data.append(convert_data_to_v6_json(data).get_dict())

            msgs = create_batch_recipes_messages(
                batch.recipe_type.name,
                batch.recipe_type.revision_num,
                input_data,
                batch.event_id,
                batch_id=batch.id)
            messages.extend(msgs)
            recipe_count += len(input_data)

        if recipe_count < MAX_RECIPE_NUM:
            # Handled less than the max number of recipes, so recipes from previous batch must be done
            self.is_prev_batch_done = True

        return messages
예제 #18
0
    def execute(self):
        """See :meth:`messaging.messages.message.CommandMessage.execute`
        """

        # Check to see if a force stop was placed on this purge process
        results = PurgeResults.objects.get(trigger_event=self.trigger_id)
        if results.force_stop_purge:
            return True

        files_to_delete = ScaleFile.objects.filter_files(job_ids=[self.job_id])

        if files_to_delete:
            # Construct input data
            files = []
            workspaces = []

            for f in files_to_delete:
                files.append({
                    'id': f.id,
                    'file_path': f.file_path,
                    'workspace': f.workspace.name
                })
                if f.workspace.name not in [
                        k for wrkspc in workspaces for k in wrkspc.keys()
                ]:
                    workspaces.append(
                        {f.workspace.name: f.workspace.json_config})

            inputs = Data()
            inputs.add_value(JsonValue('job_id', str(self.job_id)))
            inputs.add_value(JsonValue('trigger_id', str(self.trigger_id)))
            inputs.add_value(
                JsonValue('source_file_id', str(self.source_file_id)))
            inputs.add_value(JsonValue('purge', str(self.purge)))
            inputs.add_value(JsonValue('files', json.dumps(files)))
            inputs.add_value(JsonValue('workspaces', json.dumps(workspaces)))

            # Send message to create system job to delete files
            msg = create_jobs_message(job_type_name="scale-delete-files",
                                      job_type_version="1.0.0",
                                      event_id=self.trigger_id,
                                      job_type_rev_num=1,
                                      input_data=inputs)
            self.new_messages.append(msg)

        return True
예제 #19
0
파일: test_models.py 프로젝트: sau29/scale
    def test_process_recipe_input(self):
        """Tests calling RecipeManager.process_recipe_input()"""

        date_1 = now()
        min_src_started_recipe_1 = date_1 - datetime.timedelta(days=200)
        max_src_ended_recipe_1 = date_1 + datetime.timedelta(days=200)
        date_2 = date_1 + datetime.timedelta(minutes=30)
        date_3 = date_1 + datetime.timedelta(minutes=40)
        date_4 = date_1 + datetime.timedelta(minutes=50)
        min_src_started_recipe_2 = date_1 - datetime.timedelta(days=500)
        max_src_ended_recipe_2 = date_1 + datetime.timedelta(days=500)
        s_class = 'A'
        s_sensor = '1'
        collection = '12345'
        task = 'abcd'
        workspace = storage_test_utils.create_workspace()
        file_1 = storage_test_utils.create_file(workspace=workspace, file_size=10485760.0,
                                                source_sensor_class=s_class, source_sensor=s_sensor,
                                                source_collection=collection, source_task=task)
        file_2 = storage_test_utils.create_file(workspace=workspace, file_size=104857600.0,
                                                source_started=date_2, source_ended=date_3,
                                                source_sensor_class=s_class, source_sensor=s_sensor,
                                                source_collection=collection, source_task=task)
        file_3 = storage_test_utils.create_file(workspace=workspace, file_size=987654321.0,
                                                source_started=min_src_started_recipe_1, source_ended=date_4)
        file_4 = storage_test_utils.create_file(workspace=workspace, file_size=46546.0,
                                                source_ended=max_src_ended_recipe_1)
        file_5 = storage_test_utils.create_file(workspace=workspace, file_size=83457.0, source_started=date_2)
        file_6 = storage_test_utils.create_file(workspace=workspace, file_size=42126588636633.0, source_ended=date_4)
        file_7 = storage_test_utils.create_file(workspace=workspace, file_size=76645464662354.0)
        file_8 = storage_test_utils.create_file(workspace=workspace, file_size=4654.0,
                                                source_started=min_src_started_recipe_2)
        file_9 = storage_test_utils.create_file(workspace=workspace, file_size=545.0, source_started=date_3,
                                                source_ended=max_src_ended_recipe_2)
        file_10 = storage_test_utils.create_file(workspace=workspace, file_size=0.154, source_ended=date_4,
                                                 source_sensor_class=s_class, source_sensor=s_sensor,
                                                 source_collection=collection, source_task=task)
        recipe_interface = Interface()
        recipe_interface.add_parameter(FileParameter('input_a', ['text/plain']))
        recipe_interface.add_parameter(FileParameter('input_b', ['text/plain'], multiple=True))
        definition = RecipeDefinition(recipe_interface)
        definition_dict = convert_recipe_definition_to_v6_json(definition).get_dict()
        recipe_type = recipe_test_utils.create_recipe_type_v6(definition=definition_dict)

        data_1 = Data()
        data_1.add_value(FileValue('input_a', [file_1.id]))
        data_1.add_value(FileValue('input_b', [file_2.id, file_3.id, file_4.id, file_5.id]))
        data_1_dict = convert_data_to_v6_json(data_1).get_dict()
        data_2 = Data()
        data_2.add_value(FileValue('input_a', [file_6.id]))
        data_2.add_value(FileValue('input_b', [file_7.id, file_8.id, file_9.id, file_10.id]))
        data_2_dict = convert_data_to_v6_json(data_2).get_dict()
        data_3 = Data()
        data_3_dict = convert_data_to_v6_json(data_3).get_dict()

        recipe_1 = recipe_test_utils.create_recipe(recipe_type=recipe_type, input=data_1_dict)
        recipe_2 = recipe_test_utils.create_recipe(recipe_type=recipe_type, input=data_2_dict)
        recipe_3 = recipe_test_utils.create_recipe(recipe_type=recipe_type, input=data_3_dict)

        # Execute method
        Recipe.objects.process_recipe_input(recipe_1)
        Recipe.objects.process_recipe_input(recipe_2)
        Recipe.objects.process_recipe_input(recipe_3)

        # Retrieve updated recipe models
        recipes = Recipe.objects.filter(id__in=[recipe_1.id, recipe_2.id, recipe_3.id]).order_by('id')
        recipe_1 = recipes[0]
        recipe_2 = recipes[1]
        recipe_3 = recipes[2]

        # Check recipes for expected fields
        self.assertEqual(recipe_1.input_file_size, 1053.0)
        self.assertEqual(recipe_1.source_started, min_src_started_recipe_1)
        self.assertEqual(recipe_1.source_ended, max_src_ended_recipe_1)
        self.assertEqual(recipe_1.source_sensor_class, s_class)
        self.assertEqual(recipe_1.source_sensor, s_sensor)
        self.assertEqual(recipe_1.source_collection, collection)
        self.assertEqual(recipe_1.source_task, task)
        self.assertEqual(recipe_2.input_file_size, 113269857.0)
        self.assertEqual(recipe_2.source_started, min_src_started_recipe_2)
        self.assertEqual(recipe_2.source_ended, max_src_ended_recipe_2)
        self.assertEqual(recipe_2.source_sensor_class, s_class)
        self.assertEqual(recipe_2.source_sensor, s_sensor)
        self.assertEqual(recipe_2.source_collection, collection)
        self.assertEqual(recipe_2.source_task, task)
        self.assertEqual(recipe_3.input_file_size, 0.0)
        self.assertIsNone(recipe_3.source_started)
        self.assertIsNone(recipe_3.source_ended)

        # Make sure recipe input file models are created
        recipe_input_files = RecipeInputFile.objects.filter(recipe_id=recipe_1.id)
        self.assertEqual(len(recipe_input_files), 5)
        input_files_dict = {'input_a': set(), 'input_b': set()}
        for recipe_input_file in recipe_input_files:
            input_files_dict[recipe_input_file.recipe_input].add(recipe_input_file.input_file_id)
        self.assertDictEqual(input_files_dict, {'input_a': {file_1.id}, 'input_b': {file_2.id, file_3.id, file_4.id,
                                                                                    file_5.id}})
        recipe_input_files = RecipeInputFile.objects.filter(recipe_id=recipe_2.id)
        self.assertEqual(len(recipe_input_files), 5)
        input_files_dict = {'input_a': set(), 'input_b': set()}
        for recipe_input_file in recipe_input_files:
            input_files_dict[recipe_input_file.recipe_input].add(recipe_input_file.input_file_id)
        self.assertDictEqual(input_files_dict, {'input_a': {file_6.id}, 'input_b': {file_7.id, file_8.id, file_9.id,
                                                                                    file_10.id}})

        self.assertEqual(RecipeInputFile.objects.filter(recipe_id=recipe_3.id).count(), 0)