Ejemplo n.º 1
0
    def test_successful_job_creation(self):
        '''Tests successfully processing an ingest that triggers job creation.'''

        # Set up data
        configuration = {
            'version': '1.0',
            'condition': {
                'media_type': 'text/plain',
                'data_types': ['type1', 'type2'],
            },
            'data': {
                'input_data_name': self.input_name,
                'workspace_name': self.workspace.name
            },
        }
        rule_model = trigger_test_utils.create_trigger_rule(
            trigger_type='INGEST', configuration=configuration)
        self.job_type_1.trigger_rule = rule_model
        self.job_type_1.save()

        # Call method to test
        IngestTriggerHandler().process_ingested_source_file(
            self.source_file, now())

        # Check results
        queue_1 = Queue.objects.get(job_type=self.job_type_1.id)
        job_exe_1 = JobExecution.objects.select_related().get(
            pk=queue_1.job_exe_id)
        job_1 = job_exe_1.job
        self.assertEqual(job_1.data['input_data'][0]['name'], self.input_name)
        self.assertEqual(job_1.data['input_data'][0]['file_id'],
                         self.source_file.id)
Ejemplo n.º 2
0
    def test_successful_recipe_creation(self):
        """Tests successfully processing an ingest that triggers recipe creation."""

        # Set up data
        configuration = {
            'version': '1.0',
            'condition': {
                'media_type': 'text/plain',
            },
            'data': {
                'input_data_name': self.input_name,
                'workspace_name': self.workspace.name
            },
        }

        rule_model = trigger_test_utils.create_trigger_rule(
            trigger_type='INGEST', configuration=configuration)
        self.recipe_type_1.trigger_rule = rule_model
        self.recipe_type_1.save()

        # Call method to test
        IngestTriggerHandler().process_ingested_source_file(
            self.source_file, now())

        # Check results...ensure first job is queued
        queue_1 = Queue.objects.get(job_type=self.job_type_2.id)
        job_1 = Job.objects.get(id=queue_1.job_id)
        self.assertEqual(job_1.data['input_data'][0]['name'], self.input_name)
        self.assertEqual(job_1.data['input_data'][0]['file_id'],
                         self.source_file.id)
        self.assertEqual(job_1.data['output_data'][0]['name'],
                         self.output_name)
        self.assertEqual(job_1.data['output_data'][0]['workspace_id'],
                         self.workspace.id)
Ejemplo n.º 3
0
    def ready(self):
        """
        Override this method in subclasses to run code when Django starts.
        """

        # Register ingest job type timeout error
        from job.execution.tasks.job_task import JOB_TYPE_TIMEOUT_ERRORS
        JOB_TYPE_TIMEOUT_ERRORS['scale-ingest'] = 'ingest-timeout'

        from ingest.triggers.ingest_trigger_handler import IngestTriggerHandler
        from trigger.handler import register_trigger_rule_handler

        # Register ingest trigger rule handler
        register_trigger_rule_handler(IngestTriggerHandler())

        # Registers the Strike monitors with the monitor system
        import ingest.strike.monitors.factory as factory
        from ingest.strike.monitors.dir_monitor import DirWatcherMonitor
        from ingest.strike.monitors.s3_monitor import S3Monitor

        # Register monitor types
        factory.add_monitor_type(DirWatcherMonitor)
        factory.add_monitor_type(S3Monitor)

        # Registers the scanners with the Scan system
        import ingest.scan.scanners.factory as factory
        from ingest.scan.scanners.dir_scanner import DirScanner
        from ingest.scan.scanners.s3_scanner import S3Scanner

        # Register monitor types
        factory.add_scanner_type(DirScanner)
        factory.add_scanner_type(S3Scanner)
Ejemplo n.º 4
0
    def ready(self):
        """
        Override this method in subclasses to run code when Django starts.
        """

        from ingest.triggers.ingest_trigger_handler import IngestTriggerHandler
        from trigger.handler import register_trigger_rule_handler

        # Register ingest trigger rule handler
        register_trigger_rule_handler(IngestTriggerHandler())
Ejemplo n.º 5
0
    def ready(self):
        """
        Override this method in subclasses to run code when Django starts.
        """

        from ingest.cleanup import IngestJobExecutionCleaner
        from ingest.strike.cleanup import StrikeJobExecutionCleaner
        from ingest.triggers.ingest_trigger_handler import IngestTriggerHandler
        from job.execution.cleanup import REGISTERED_CLEANERS
        from trigger.handler import register_trigger_rule_handler

        # Register job execution cleaners for ingest and Strike jobs
        REGISTERED_CLEANERS['scale-ingest'] = IngestJobExecutionCleaner()
        REGISTERED_CLEANERS['scale-strike'] = StrikeJobExecutionCleaner()

        # Register ingest trigger rule handler
        register_trigger_rule_handler(IngestTriggerHandler())
Ejemplo n.º 6
0
def _complete_ingest(ingest, status):
    """Completes the given ingest in an atomic transaction

    :param ingest: The ingest model
    :type ingest: :class:`ingest.models.Ingest`
    :param status: The final status of the ingest
    :type status: string
    """

    # Atomically mark ingest status and run ingest trigger rules
    with transaction.atomic():
        logger.info('Marking ingest for %s as %s', ingest.file_name, status)
        ingest.status = status
        if status == 'INGESTED':
            ingest.ingest_ended = now()
        ingest.save()
        if status == 'INGESTED':
            IngestTriggerHandler().process_ingested_source_file(
                ingest.source_file, ingest.ingest_ended)
Ejemplo n.º 7
0
    def ready(self):
        """
        Override this method in subclasses to run code when Django starts.
        """

        from ingest.triggers.ingest_trigger_handler import IngestTriggerHandler
        from trigger.handler import register_trigger_rule_handler

        # Register ingest trigger rule handler
        register_trigger_rule_handler(IngestTriggerHandler())

        # Registers the Strike monitors with the monitor system
        import ingest.strike.monitors.factory as factory
        from ingest.strike.monitors.dir_monitor import DirWatcherMonitor
        from ingest.strike.monitors.s3_monitor import S3Monitor

        # Register monitor types
        factory.add_monitor_type(DirWatcherMonitor)
        factory.add_monitor_type(S3Monitor)
Ejemplo n.º 8
0
def _complete_ingest(ingest, status, source_file):
    '''Completes the given ingest by marking its

    :param ingest: The ingest model
    :type ingest: :class:`ingest.models.Ingest`
    :param status: The final status of the ingest
    :type status: str
    :param source_file: The model of the source file that was ingested
    :type source_file: :class:`source.models.SourceFile`
    '''

    # TODO: future refactor: this will also be responsible for saving the source file model

    # Atomically mark ingest status and run ingest trigger rules
    with transaction.atomic():
        logger.info('Marking ingest %i as %s', ingest.id, status)
        ingest.source_file = source_file
        ingest.status = status
        if status == 'INGESTED':
            ingest.ingest_ended = timezone.now()
        ingest.save()
        if status == 'INGESTED':
            IngestTriggerHandler().process_ingested_source_file(ingest.source_file, ingest.ingest_ended)
Ejemplo n.º 9
0
    def handle(self, *args, **options):
        """See :meth:`django.core.management.base.BaseCommand.handle`.

        This method migrates existing data files into scale.
        """
        logger.info(u'Command starting: migratedata')

        workspace, workspace_path, local_path, data_types = None, None, None, []
        if options['workspace'] is not None and options[
                'workspace_path'] is not None:
            workspace, workspace_path = options['workspace'], options[
                'workspace_path']
            tmp = Workspace.objects.filter(name=workspace)
            if tmp.count() > 0:
                workspace = tmp.first()
            else:
                workspace = Workspace.objects.get(id=int(workspace))
        else:
            logger.error('Must specify workspace and workspace-path.')
            return False
        if options['data_type'] is not None:
            data_types.extend(options['data_type'])

        mnt_dirs = None
        if options['local_path'] is not None:
            local_path = options['local_path']
        else:  # mount
            mnt_dirs = "/tmp", tempfile.mkdtemp()
            workspace.setup_download_dir(*mnt_dirs)
            local_path = os.path.join(mnt_dirs[1], workspace_path)

        logger.info("Ingesting files from %s/%s", workspace.name,
                    workspace_path)
        filenames = self.generate_file_list(local_path, options['include'],
                                            options['exclude'])
        logger.info("Found %d files", len(filenames))

        # prepare for ingest ala strike
        ingest_records = {}
        for filename in filenames:
            logger.info("Generating ingest record for %s" % filename)
            ingest = Ingest()
            ingest.file_name = os.path.basename(filename)
            ingest.file_path = os.path.join(
                workspace_path, os.path.relpath(filename, local_path))
            ingest.transfer_started = datetime.utcfromtimestamp(
                os.path.getatime(filename))
            ingest.file_size = ingest.bytes_transferred = os.path.getsize(
                filename)
            ingest.transfer_ended = timezone.now()
            ingest.media_type = get_media_type(filename)
            ingest.workspace = workspace
            for data_type in data_types:
                ingest.add_data_type_tag(data_type)
            ingest.status = 'TRANSFERRED'
            if options['no_commit']:
                s = IngestDetailsSerializer()
                logger.info(s.to_representation(ingest))
            else:
                ingest.save()
                ingest_records[filename] = ingest.id
        logging.info("Ingests records created")

        # start ingest tasks for all the files
        if not options['no_commit']:
            logging.info("Starting ingest tasks")
            for filename in filenames:
                ingest = Ingest.objects.get(id=ingest_records[filename])
                logging.info("Processing ingest %s" % ingest.file_name)
                with transaction.atomic():
                    ingest.ingest_started = timezone.now()
                    sf = ingest.source_file = SourceFile.create()
                    sf.update_uuid(ingest.file_name)
                    for tag in ingest.get_data_type_tags():
                        sf.add_data_type_tag(tag)
                    sf.media_type = ingest.media_type
                    sf.file_name = ingest.file_name
                    sf.file_size = ingest.file_size
                    sf.file_path = ingest.file_path
                    sf.workspace = workspace
                    sf.is_deleted = False
                    sf.deleted = None
                    sf.save()
                    sf.set_countries()
                    sf.save()
                    ingest.status = 'INGESTED'
                    ingest.ingest_ended = timezone.now()
                    ingest.source_file = sf
                    ingest.save()
                    IngestTriggerHandler().process_ingested_source_file(
                        ingest.source_file, ingest.ingest_ended)

        logging.info(
            "Ingests processed, monitor the queue for triggered jobs.")

        if mnt_dirs is not None:
            workspace.cleanup_download_dir(*mnt_dirs)

        logger.info(u'Command completed: migratedata')