Example #1
0
def perform_ingest(ingest_id, mount):
    """Performs the ingest for the given ingest ID

    :param ingest_id: The ID of the ingest to perform
    :type ingest_id: int
    :param mount: The file system to mount in the form of host:/dir/path
    :type mount: string
    """

    # TODO: refactor to combine _get_ingest(), _get_job_exe_id(), and _set_ingesting_status() in one database
    # transaction with as few queries as possible, include retries
    ingest = _get_ingest(ingest_id)
    job_exe_id = _get_job_exe_id(ingest)
    if not os.path.exists(SCALE_INGEST_MOUNT_PATH):
        logger.info('Creating %s', SCALE_INGEST_MOUNT_PATH)
        os.makedirs(SCALE_INGEST_MOUNT_PATH, mode=0755)
    dup_path = os.path.join(SCALE_INGEST_MOUNT_PATH, 'duplicate',
                            ingest.file_name)
    ingest_path = os.path.join(SCALE_INGEST_MOUNT_PATH, ingest.ingest_path)
    nfs_mount(mount, SCALE_INGEST_MOUNT_PATH, read_only=False)

    try:
        # Check condition of the ingest
        ingest = _set_ingesting_status(ingest, ingest_path, dup_path)
        if ingest is None:
            return

        logger.info('Storing %s into %s on %s', ingest_path, ingest.file_path,
                    ingest.workspace.name)
        try:
            # TODO: future refactor: before copying file, grab existing source file (no lock) or create and save model
            # This guarantees that source file exists and can be used to check if file is duplicate
            # After this step, the source file should be marked as is_deleted so that it can't be used yet
            src_file = SourceFile.objects.store_file(
                ingest_path, ingest.get_data_type_tags(), ingest.workspace,
                ingest.file_path)

            _complete_ingest(ingest, 'INGESTED', src_file)
            _delete_ingest_file(ingest_path)
            logger.info('Ingest successful: %s', ingest_path)
        except DuplicateFile:
            logger.warning('Duplicate file detected: %i',
                           ingest_id,
                           exc_info=True)
            # TODO: future refactor: pass source file model in so source files have duplicate ingests tied to them
            _complete_ingest(ingest, 'DUPLICATE', None)
            _move_ingest_file(ingest_path, dup_path)
        except Exception:
            # TODO: have this delete the stored source file using some SourceFile.objects.delete_file method
            # TODO: future refactor: pass source file model in so source files have errored ingests tied to them
            # TODO: change ERRORED to FAILED
            _complete_ingest(ingest, 'ERRORED', None)
            raise  # File remains where it is so it can be processed again
    finally:
        nfs_umount(SCALE_INGEST_MOUNT_PATH)

    try:
        cleanup_job_exe(job_exe_id)
    except Exception:
        logger.exception('Job Execution %i: Error cleaning up', job_exe_id)
Example #2
0
    def move_files(self, work_dir, files_to_move):
        """See :meth:`storage.brokers.broker.Broker.move_files`
        """

        nfs_mount(self.mount, work_dir, False)
        try:
            for file_to_move in files_to_move:
                old_workspace_path = file_to_move[0]
                new_workspace_path = file_to_move[1]

                full_old_workspace_path = os.path.join(work_dir,
                                                       old_workspace_path)
                full_new_workspace_path = os.path.join(work_dir,
                                                       new_workspace_path)
                full_new_workspace_dir = os.path.dirname(
                    full_new_workspace_path)

                if not os.path.exists(full_new_workspace_dir):
                    logger.info('Creating %s', full_new_workspace_dir)
                    os.makedirs(full_new_workspace_dir, mode=0755)

                logger.info('Moving %s to %s', full_old_workspace_path,
                            full_new_workspace_path)
                shutil.move(full_old_workspace_path, full_new_workspace_path)
                os.chmod(full_new_workspace_path, 0644)
        finally:
            nfs_umount(work_dir)
Example #3
0
def perform_ingest(ingest_id, mount):
    """Performs the ingest for the given ingest ID

    :param ingest_id: The ID of the ingest to perform
    :type ingest_id: int
    :param mount: The file system to mount in the form of host:/dir/path
    :type mount: string
    """

    # TODO: refactor to combine _get_ingest(), _get_job_exe_id(), and _set_ingesting_status() in one database
    # transaction with as few queries as possible, include retries
    ingest = _get_ingest(ingest_id)
    job_exe_id = _get_job_exe_id(ingest)
    if not os.path.exists(SCALE_INGEST_MOUNT_PATH):
        logger.info('Creating %s', SCALE_INGEST_MOUNT_PATH)
        os.makedirs(SCALE_INGEST_MOUNT_PATH, mode=0755)
    dup_path = os.path.join(SCALE_INGEST_MOUNT_PATH, 'duplicate', ingest.file_name)
    ingest_path = os.path.join(SCALE_INGEST_MOUNT_PATH, ingest.ingest_path)
    nfs_mount(mount, SCALE_INGEST_MOUNT_PATH, read_only=False)

    try:
        # Check condition of the ingest
        ingest = _set_ingesting_status(ingest, ingest_path, dup_path)
        if ingest is None:
            return

        logger.info('Storing %s into %s on %s', ingest_path, ingest.file_path, ingest.workspace.name)
        try:
            # TODO: future refactor: before copying file, grab existing source file (no lock) or create and save model
            # This guarantees that source file exists and can be used to check if file is duplicate
            # After this step, the source file should be marked as is_deleted so that it can't be used yet
            src_file = SourceFile.objects.store_file(ingest_path, ingest.get_data_type_tags(), ingest.workspace,
                                                     ingest.file_path)

            _complete_ingest(ingest, 'INGESTED', src_file)
            _delete_ingest_file(ingest_path)
            logger.info('Ingest successful: %s', ingest_path)
        except DuplicateFile:
            logger.warning('Duplicate file detected: %i', ingest_id, exc_info=True)
            # TODO: future refactor: pass source file model in so source files have duplicate ingests tied to them
            _complete_ingest(ingest, 'DUPLICATE', None)
            _move_ingest_file(ingest_path, dup_path)
        except Exception:
            # TODO: have this delete the stored source file using some SourceFile.objects.delete_file method
            # TODO: future refactor: pass source file model in so source files have errored ingests tied to them
            # TODO: change ERRORED to FAILED
            _complete_ingest(ingest, 'ERRORED', None)
            raise  # File remains where it is so it can be processed again
    finally:
        nfs_umount(SCALE_INGEST_MOUNT_PATH)

    try:
        cleanup_job_exe(job_exe_id)
    except Exception:
        logger.exception('Job Execution %i: Error cleaning up', job_exe_id)
Example #4
0
    def delete_files(self, work_dir, workspace_paths):
        '''See :meth:`storage.brokers.broker.Broker.delete_files`
        '''

        nfs_mount(self.mount, work_dir, False)
        try:
            for workspace_path in workspace_paths:
                path_to_delete = os.path.join(work_dir, workspace_path)
                if os.path.exists(path_to_delete):
                    logger.info('Deleting %s', path_to_delete)
                    os.remove(path_to_delete)
        finally:
            nfs_umount(work_dir)
Example #5
0
    def delete_files(self, work_dir, workspace_paths):
        """See :meth:`storage.brokers.broker.Broker.delete_files`
        """

        nfs_mount(self.mount, work_dir, False)
        try:
            for workspace_path in workspace_paths:
                path_to_delete = os.path.join(work_dir, workspace_path)
                if os.path.exists(path_to_delete):
                    logger.info('Deleting %s', path_to_delete)
                    os.remove(path_to_delete)
        finally:
            nfs_umount(work_dir)
Example #6
0
    def mount_and_process_dir(self):
        '''Mounts NFS and processes the current files in the Strike directory
        '''

        try:
            if not os.path.exists(self.strike_dir):
                logger.info('Creating %s', self.strike_dir)
                os.makedirs(self.strike_dir, mode=0755)
            nfs_mount(self.mount, self.strike_dir, read_only=False)
            self._init_dirs()
            self._process_dir()
        except Exception:
            logger.exception('Strike processor encountered error.')
        finally:
            nfs_umount(self.strike_dir)
Example #7
0
    def mount_and_process_dir(self):
        '''Mounts NFS and processes the current files in the Strike directory
        '''

        try:
            if not os.path.exists(self.strike_dir):
                logger.info('Creating %s', self.strike_dir)
                os.makedirs(self.strike_dir, mode=0755)
            nfs_mount(self.mount, self.strike_dir, read_only=False)
            self._init_dirs()
            self._process_dir()
        except Exception:
            logger.exception('Strike processor encountered error.')
        finally:
            nfs_umount(self.strike_dir)
Example #8
0
    def upload_files(self, upload_dir, work_dir, files_to_upload):
        '''See :meth:`storage.brokers.broker.Broker.setup_upload_dir`
        '''

        nfs_mount(self.mount, work_dir, False)
        try:
            for file_to_upload in files_to_upload:
                src_path = file_to_upload[0]
                workspace_path = file_to_upload[1]

                full_src_path = os.path.join(upload_dir, src_path)
                full_workspace_path = os.path.join(work_dir, workspace_path)
                full_workspace_dir = os.path.dirname(full_workspace_path)

                if not os.path.exists(full_workspace_dir):
                    logger.info('Creating %s', full_workspace_dir)
                    os.makedirs(full_workspace_dir, mode=0755)
                self._copy_file(full_src_path, full_workspace_path)
        finally:
            nfs_umount(work_dir)
Example #9
0
    def upload_files(self, upload_dir, work_dir, files_to_upload):
        """See :meth:`storage.brokers.broker.Broker.setup_upload_dir`
        """

        nfs_mount(self.mount, work_dir, False)
        try:
            for file_to_upload in files_to_upload:
                src_path = file_to_upload[0]
                workspace_path = file_to_upload[1]

                full_src_path = os.path.join(upload_dir, src_path)
                full_workspace_path = os.path.join(work_dir, workspace_path)
                full_workspace_dir = os.path.dirname(full_workspace_path)

                if not os.path.exists(full_workspace_dir):
                    logger.info('Creating %s', full_workspace_dir)
                    os.makedirs(full_workspace_dir, mode=0755)
                self._copy_file(full_src_path, full_workspace_path)
                os.chmod(full_workspace_path, 0644)
        finally:
            nfs_umount(work_dir)
Example #10
0
    def move_files(self, work_dir, files_to_move):
        '''See :meth:`storage.brokers.broker.Broker.move_files`
        '''

        nfs_mount(self.mount, work_dir, False)
        try:
            for file_to_move in files_to_move:
                old_workspace_path = file_to_move[0]
                new_workspace_path = file_to_move[1]

                full_old_workspace_path = os.path.join(work_dir, old_workspace_path)
                full_new_workspace_path = os.path.join(work_dir, new_workspace_path)
                full_new_workspace_dir = os.path.dirname(full_new_workspace_path)

                if not os.path.exists(full_new_workspace_dir):
                    logger.info('Creating %s', full_new_workspace_dir)
                    os.makedirs(full_new_workspace_dir, mode=0755)

                logger.info('Moving %s to %s', full_old_workspace_path, full_new_workspace_path)
                shutil.move(full_old_workspace_path, full_new_workspace_path)
        finally:
            nfs_umount(work_dir)
Example #11
0
    def setup_download_dir(self, download_dir, work_dir):
        '''See :meth:`storage.brokers.broker.Broker.setup_download_dir`
        '''

        nfs_mount(self.mount, work_dir, True)
Example #12
0
def perform_ingest(ingest_id, mount):
    '''Performs the ingest for the given ingest ID

    :param ingest_id: The ID of the ingest to perform
    :type ingest_id: long
    :param mount: The file system to mount in the form of host:/dir/path
    :type mount: str
    '''

    job_exe_id = None
    upload_work_dir = None
    try:
        ingest = Ingest.objects.select_related().get(id=ingest_id)
        job_exe_id = JobExecution.objects.get_latest([ingest.job])[ingest.job.id].id
        ingest_work_dir = get_ingest_work_dir(job_exe_id)
        dup_path = os.path.join(ingest_work_dir, 'duplicate', ingest.file_name)
        ingest_path = os.path.join(ingest_work_dir, ingest.ingest_path)
        upload_work_dir = os.path.join(os.path.dirname(ingest_path), 'upload', str(ingest_id))
        if not os.path.exists(ingest_work_dir):
            logger.info('Creating %s', ingest_work_dir)
            os.makedirs(ingest_work_dir, mode=0755)
        nfs_mount(mount, ingest_work_dir, read_only=False)

        # Check condition of the ingest
        ingest = _set_ingesting_status(ingest, ingest_path, dup_path)
        if ingest is None:
            return

        logger.info('Storing %s into %s on %s', ingest_path, ingest.file_path, ingest.workspace.name)
        try:
            src_file = SourceFile.objects.store_file(upload_work_dir, ingest_path, ingest.get_data_type_tags(),
                                                     ingest.workspace, ingest.file_path)
            # Atomically store file, mark INGESTED, and run ingest trigger rules
            with transaction.atomic():
                # TODO: It's possible that the file will be successfully moved into the workspace but this database
                # transaction might fail. This will result in a file that is in a workspace but doesn't have database
                # entries. Attempts to re-ingest will result in duplicate file errors.
                logger.info('Marking file as INGESTED: %i', ingest_id)
                ingest.source_file = src_file
                ingest.status = 'INGESTED'
                ingest.ingest_ended = timezone.now()
                ingest.save()
                logger.debug('Checking ingest trigger rules')
                for ingest_rule in get_ingest_rules():
                    ingest_rule.process_ingest(ingest, src_file.id)

            # Delete ingest file
            _delete_ingest_file(ingest_path)
            logger.info('Ingest successful: %s', ingest_path)
        except DuplicateFile:
            logger.warning('Duplicate file detected: %i', ingest_id, exc_info=True)
            ingest.status = 'DUPLICATE'
            ingest.save()
            _move_ingest_file(ingest_path, dup_path)
        except Exception:
            # TODO: have this delete the stored source file using some SourceFile.objects.delete_file method
            ingest.status = 'ERRORED'
            ingest.save()
            raise  # File remains where it is so it can be processed again
    finally:
        try:
            if upload_work_dir and os.path.exists(upload_work_dir):
                logger.info('Deleting %s', upload_work_dir)
                shutil.rmtree(upload_work_dir)
        except:
            # Swallow exception so error from main try block isn't covered up
            logger.exception('Failed to delete upload work dir %s', upload_work_dir)

        if job_exe_id:
            cleanup_job_exe(job_exe_id)
Example #13
0
def perform_ingest(ingest_id, mount):
    '''Performs the ingest for the given ingest ID

    :param ingest_id: The ID of the ingest to perform
    :type ingest_id: long
    :param mount: The file system to mount in the form of host:/dir/path
    :type mount: str
    '''

    job_exe_id = None
    upload_work_dir = None
    try:
        # TODO: refactor to combine _get_ingest(), _get_job_exe_id(), and _set_ingesting_status() in one database
        # transaction with as few queries as possible, include retries
        ingest = _get_ingest(ingest_id)
        job_exe_id = _get_job_exe_id(ingest)
        create_job_exe_dir(job_exe_id)
        ingest_work_dir = get_ingest_work_dir(job_exe_id)
        dup_path = os.path.join(ingest_work_dir, 'duplicate', ingest.file_name)
        ingest_path = os.path.join(ingest_work_dir, ingest.ingest_path)
        upload_work_dir = os.path.join(os.path.dirname(ingest_path), 'upload', str(ingest_id))
        if not os.path.exists(ingest_work_dir):
            logger.info('Creating %s', ingest_work_dir)
            os.makedirs(ingest_work_dir, mode=0755)
        nfs_mount(mount, ingest_work_dir, read_only=False)
        if not os.path.exists(upload_work_dir):
            logger.info('Creating %s', upload_work_dir)
            os.makedirs(upload_work_dir, mode=0755)

        # Check condition of the ingest
        ingest = _set_ingesting_status(ingest, ingest_path, dup_path)
        if ingest is None:
            return

        logger.info('Storing %s into %s on %s', ingest_path, ingest.file_path, ingest.workspace.name)
        try:
            # TODO: future refactor: before copying file, grab existing source file (no lock) or create and save model
            # This guarantees that source file exists and can be used to check if file is duplicate
            # After this step, the source file should be marked as is_deleted so that it can't be used yet
            src_file = SourceFile.objects.store_file(upload_work_dir, ingest_path, ingest.get_data_type_tags(),
                                                     ingest.workspace, ingest.file_path)

            _complete_ingest(ingest, 'INGESTED', src_file)
            _delete_ingest_file(ingest_path)
            logger.info('Ingest successful: %s', ingest_path)
        except DuplicateFile:
            logger.warning('Duplicate file detected: %i', ingest_id, exc_info=True)
            # TODO: future refactor: pass source file model in so source files have duplicate ingests tied to them
            _complete_ingest(ingest, 'DUPLICATE', None)
            _move_ingest_file(ingest_path, dup_path)
        except Exception:
            # TODO: have this delete the stored source file using some SourceFile.objects.delete_file method
            # TODO: future refactor: pass source file model in so source files have errored ingests tied to them
            # TODO: change ERRORED to FAILED
            _complete_ingest(ingest, 'ERRORED', None)
            raise  # File remains where it is so it can be processed again
    finally:
        try:
            # Try to clean up the upload directory
            if upload_work_dir and os.path.exists(upload_work_dir):
                upload_dir = os.path.join(upload_work_dir, 'upload')
                workspace_work_dir = os.path.join(upload_work_dir, 'work')
                if os.path.exists(workspace_work_dir):
                    ScaleFile.objects.cleanup_upload_dir(upload_dir, workspace_work_dir, ingest.workspace)
                    logger.info('Deleting %s', workspace_work_dir)
                    os.rmdir(workspace_work_dir)
                if os.path.exists(upload_dir):
                    logger.info('Deleting %s', upload_dir)
                    # Delete everything in upload dir
                    shutil.rmtree(upload_dir)
                logger.info('Deleting %s', upload_work_dir)
                os.rmdir(upload_work_dir)
        except:
            # Swallow exception so error from main try block isn't covered up
            logger.exception('Failed to delete upload work dir %s', upload_work_dir)

    try:
        if job_exe_id:
            cleanup_job_exe(job_exe_id)
    except Exception:
        logger.exception('Job Execution %i: Error cleaning up', job_exe_id)
Example #14
0
    def setup_download_dir(self, download_dir, work_dir):
        """See :meth:`storage.brokers.broker.Broker.setup_download_dir`
        """

        nfs_mount(self.mount, work_dir, True)