Ejemplo n.º 1
0
    def test_successful_manual_kickoff(self, mock_msg_mgr):
        """Tests successfully producing an ingest that immediately calls a recipe"""
        
        ingest = ingest_test_utils.create_ingest(source_file=self.source_file)
        recipe_type = recipe_test_utils.create_recipe_type_v6(definition=recipe_test_utils.RECIPE_DEFINITION)

        # Call method to test
        IngestRecipeHandler().process_manual_ingested_source_file(ingest.id, self.source_file, now(), recipe_type.id)
        self.assertEqual(Recipe.objects.all().count(), 1)
        self.assertEqual(Recipe.objects.first().recipe_type.name, recipe_type.name)
Ejemplo n.º 2
0
    def test_successful_manual_kickoff(self, mock_create, mock_msg_mgr):
        """Tests successfully producing an ingest that immediately calls a recipe"""

        ingest = ingest_test_utils.create_ingest(source_file=self.source_file)
        recipe_type = recipe_test_utils.create_recipe_type_v6(
            definition=recipe_test_utils.RECIPE_DEFINITION)

        # Call method to test
        IngestRecipeHandler().process_manual_ingested_source_file(
            ingest.id, self.source_file, now(), recipe_type.id)
        mock_msg_mgr.assert_called_once()
        mock_create.assert_called_once()
Ejemplo n.º 3
0
def _complete_ingest(ingest, status):
    """Completes the given ingest in an atomic transaction

    :param ingest: The ingest model
    :type ingest: :class:`ingest.models.Ingest`
    :param status: The final status of the ingest
    :type status: string
    """

    # Atomically mark ingest status and run ingest trigger rules
    with transaction.atomic():
        logger.info('Marking ingest for %s as %s', ingest.file_name, status)
        ingest.status = status
        if status == 'INGESTED':
            ingest.ingest_ended = now()
        ingest.save()
    if status == 'INGESTED':
        if ingest.get_recipe_name():
            IngestRecipeHandler().process_ingested_source_file(
                ingest.id, ingest.get_ingest_source_event(),
                ingest.source_file, ingest.ingest_ended)
Ejemplo n.º 4
0
    def handle(self, *args, **options):
        """See :meth:`django.core.management.base.BaseCommand.handle`.

        This method migrates existing data files into scale.
        """
        logger.info(u'Command starting: migratedata')

        workspace, workspace_path, local_path, data_types = None, None, None, []
        if options['workspace'] is not None and options[
                'workspace_path'] is not None:
            workspace, workspace_path = options['workspace'], options[
                'workspace_path']
            tmp = Workspace.objects.filter(name=workspace)
            if tmp.count() > 0:
                workspace = tmp.first()
            else:
                workspace = Workspace.objects.get(id=int(workspace))
        else:
            logger.error('Must specify workspace and workspace-path.')
            return False
        if options['data_type'] is not None:
            data_types.extend(options['data_type'])

        mnt_dirs = None
        if options['local_path'] is not None:
            local_path = options['local_path']
        else:  # mount
            mnt_dirs = "/tmp", tempfile.mkdtemp()
            workspace.setup_download_dir(*mnt_dirs)
            local_path = os.path.join(mnt_dirs[1], workspace_path)

        logger.info("Ingesting files from %s/%s", workspace.name,
                    workspace_path)
        filenames = self.generate_file_list(local_path, options['include'],
                                            options['exclude'])
        logger.info("Found %d files", len(filenames))

        # prepare for ingest ala strike
        ingest_records = {}
        for filename in filenames:
            logger.info("Generating ingest record for %s" % filename)
            ingest = Ingest()
            ingest.file_name = os.path.basename(filename)
            ingest.file_path = os.path.join(
                workspace_path, os.path.relpath(filename, local_path))
            ingest.transfer_started = datetime.utcfromtimestamp(
                os.path.getatime(filename))
            ingest.file_size = ingest.bytes_transferred = os.path.getsize(
                filename)
            ingest.transfer_ended = timezone.now()
            ingest.media_type = get_media_type(filename)
            ingest.workspace = workspace
            for data_type in data_types:
                ingest.add_data_type_tag(data_type)
            ingest.status = 'TRANSFERRED'
            if options['no_commit']:
                s = IngestDetailsSerializerV6()
                logger.info(s.to_representation(ingest))
            else:
                ingest.save()
                ingest_records[filename] = ingest.id
        logging.info("Ingests records created")

        # start ingest tasks for all the files
        if not options['no_commit']:
            logging.info("Starting ingest tasks")
            for filename in filenames:
                ingest = Ingest.objects.get(id=ingest_records[filename])
                logging.info("Processing ingest %s" % ingest.file_name)
                with transaction.atomic():
                    ingest.ingest_started = timezone.now()
                    sf = ingest.source_file = SourceFile.create()
                    sf.update_uuid(ingest.file_name)
                    for tag in ingest.get_data_type_tags():
                        sf.add_data_type_tag(tag)
                    sf.media_type = ingest.media_type
                    sf.file_name = ingest.file_name
                    sf.file_size = ingest.file_size
                    sf.file_path = ingest.file_path
                    sf.workspace = workspace
                    sf.is_deleted = False
                    sf.deleted = None
                    sf.save()
                    sf.set_countries()
                    sf.save()
                    ingest.status = 'INGESTED'
                    ingest.ingest_ended = timezone.now()
                    ingest.source_file = sf
                    ingest.save()
                    if options['recipe']:
                        IngestRecipeHandler().process_ingested_source_file(
                            ingest.id, ingest.source_file, ingest.ingest_ended)

        logging.info(
            "Ingests processed, monitor the queue for triggered jobs.")

        if mnt_dirs is not None:
            workspace.cleanup_download_dir(*mnt_dirs)

        logger.info(u'Command completed: migratedata')
Ejemplo n.º 5
0
    def test_successful_recipe_kickoff(self, mock_msg_mgr, mock_msg_mgr_rc, mock_msg_mgr_q):
        """Tests successfully producing an ingest that immediately calls a recipe"""

        strike_config = {
            'version': '7',
            'workspace': self.workspace.name,
            'monitor': {'type': 'dir-watcher', 'transfer_suffix': '_tmp'},
            'files_to_ingest': [{
                'filename_regex': 'input_file',
                'data_types': ['image_type'],
                'new_workspace': self.workspace.name,
                'new_file_path': 'my/path'
            }],
            'recipe': {
                'name': self.recipe_v7.name
            },
        }
        config = StrikeConfigurationV6(strike_config).get_configuration()
        strike = Strike.objects.create_strike('my_name', 'my_title', 'my_description', config)
        ingest = ingest_test_utils.create_ingest(source_file=self.source_file)

        # Call method to test
        IngestRecipeHandler().process_ingested_source_file(ingest.id, strike, self.source_file, now())
        self.assertEqual(Recipe.objects.count(), 1)
        self.assertEqual(Recipe.objects.first().recipe_type.name, self.recipe_v7.name)

        # Verify ingest event and trigger event objects were created
        from ingest.models import IngestEvent
        events = IngestEvent.objects.all().values()
        self.assertEqual(len(events), 1)
        self.assertEqual(events[0]['type'], 'STRIKE')
        
        # Create scan
        scan_config = {
            'workspace': self.workspace.name,
            'scanner': {
                'type': 'dir'
            },
            'files_to_ingest': [{
                'filename_regex': 'input_file',
                'data_types': ['type1'],
                'new_file_path': os.path.join('my', 'path'),
                'new_workspace': self.workspace.name,
            }],
            'recipe': {
                'name': self.recipe_v7.name,
            },
        }
        scan_configuration = ScanConfigurationV6(scan_config).get_configuration()
        scan = Scan.objects.create_scan('my_name', 'my_title', 'my_description', scan_configuration)

        # Call method to test
        IngestRecipeHandler().process_ingested_source_file(ingest.id, scan, self.source_file, now())
        self.assertEqual(Recipe.objects.count(), 2)
        self.assertEqual(Recipe.objects.last().recipe_type.name, self.recipe_v7.name)

        # Verify events were created
        events = IngestEvent.objects.all().values()
        self.assertEqual(len(events), 2)
        self.assertEqual(events[1]['type'], 'SCAN')
        
        # Update the recipe then call ingest with revision 1
        manifest = job_test_utils.create_seed_manifest(
            inputs_files=[{'name': 'INPUT_FILE', 'media_types': ['text/plain'], 'required': True, 'multiple': True}], inputs_json=[])
        jt2 = job_test_utils.create_seed_job_type(manifest=manifest)
        definition = {'version': '7',
                      'input': {'files': [{'name': 'INPUT_FILE',
                                            'media_types': ['text/plain'],
                                            'required': True,
                                            'multiple': True}],
                                'json': []},
                      'nodes': {'node_a': {'dependencies': [],
                                                'input': {'INPUT_FILE': {'type': 'recipe', 'input': 'INPUT_FILE'}},
                                                'node_type': {'node_type': 'job', 'job_type_name': self.jt1.name,
                                                              'job_type_version': self.jt1.version,
                                                              'job_type_revision': 1}},
                                'node_b': {'dependencies': [],
                                                'input': {'INPUT_FILE': {'type': 'recipe', 'input': 'INPUT_FILE'}},
                                                'node_type': {'node_type': 'job', 'job_type_name': jt2.name,
                                                              'job_type_version': jt2.version,
                                                              'job_type_revision': 1}}}}
        
        recipe_test_utils.edit_recipe_type_v6(recipe_type=self.recipe, definition=definition)
        
        strike_config['recipe'] = {
            'name': self.recipe.name,
            'revision_num': 1,
        }
        config = StrikeConfigurationV6(strike_config).get_configuration()
        strike = Strike.objects.create_strike('my_name_2', 'my_title_2', 'my_description_2', config)
        ingest = ingest_test_utils.create_ingest(source_file=self.source_file)

        # Call method to test
        IngestRecipeHandler().process_ingested_source_file(ingest.id, strike, self.source_file, now())
        self.assertEqual(Recipe.objects.count(), 3)
        self.assertEqual(Recipe.objects.first().recipe_type.name, self.recipe.name)

        # Verify events were created
        events = IngestEvent.objects.all().values()
        self.assertEqual(len(events), 3)
        self.assertEqual(events[2]['type'], 'STRIKE')