Example #1
0
    def test_successfully(self, mock_execute, mock_exists):
        """Tests calling HostBroker.download_files() successfully"""

        mock_exists.return_value = True

        volume_path = os.path.join('the', 'volume', 'path')
        file_name_1 = 'my_file.txt'
        file_name_2 = 'my_file.json'
        local_path_file_1 = os.path.join('my_dir_1', file_name_1)
        local_path_file_2 = os.path.join('my_dir_2', file_name_2)
        workspace_path_file_1 = os.path.join('my_wrk_dir_1', file_name_1)
        workspace_path_file_2 = os.path.join('my_wrk_dir_2', file_name_2)
        full_workspace_path_file_1 = os.path.join(volume_path,
                                                  workspace_path_file_1)
        full_workspace_path_file_2 = os.path.join(volume_path,
                                                  workspace_path_file_2)

        file_1 = storage_test_utils.create_file(
            file_path=workspace_path_file_1)
        file_2 = storage_test_utils.create_file(
            file_path=workspace_path_file_2)
        file_1_dl = FileDownload(file_1, local_path_file_1, False)
        file_2_dl = FileDownload(file_2, local_path_file_2, False)

        # Call method to test
        self.broker.download_files(volume_path, [file_1_dl, file_2_dl])

        # Check results
        two_calls = [
            call(['ln', '-s', full_workspace_path_file_1, local_path_file_1]),
            call(['ln', '-s', full_workspace_path_file_2, local_path_file_2])
        ]
        mock_execute.assert_has_calls(two_calls)
Example #2
0
    def test_success(self):
        """Tests calling ScaleFileManager.download_files() successfully"""

        workspace_1 = storage_test_utils.create_workspace()
        file_1 = storage_test_utils.create_file(workspace=workspace_1)
        local_path_1 = '/my/local/path/file.txt'
        file_2 = storage_test_utils.create_file(workspace=workspace_1)
        local_path_2 = '/another/local/path/file.txt'
        file_3 = storage_test_utils.create_file(workspace=workspace_1)
        local_path_3 = '/another/local/path/file.json'
        workspace_1.setup_download_dir = MagicMock()
        workspace_1.download_files = MagicMock()

        workspace_2 = storage_test_utils.create_workspace()
        file_4 = storage_test_utils.create_file(workspace=workspace_2)
        local_path_4 = '/my/local/path/4/file.txt'
        file_5 = storage_test_utils.create_file(workspace=workspace_2)
        local_path_5 = '/another/local/path/5/file.txt'
        workspace_2.setup_download_dir = MagicMock()
        workspace_2.download_files = MagicMock()

        files = [FileDownload(file_1, local_path_1, False), FileDownload(file_2, local_path_2, False),
                 FileDownload(file_3, local_path_3, False), FileDownload(file_4, local_path_4, False),
                 FileDownload(file_5, local_path_5, False)]
        ScaleFile.objects.download_files(files)

        workspace_1.download_files.assert_called_once_with([FileDownload(file_1, local_path_1, False),
                                                            FileDownload(file_2, local_path_2, False),
                                                            FileDownload(file_3, local_path_3, False)])
        workspace_2.download_files.assert_called_once_with([FileDownload(file_4, local_path_4, False),
                                                            FileDownload(file_5, local_path_5, False)])
Example #3
0
    def test_deleted_file(self):
        """Tests calling ScaleFileManager.download_files() with a deleted file"""

        workspace_1 = storage_test_utils.create_workspace()
        file_1 = storage_test_utils.create_file(workspace=workspace_1)
        local_path_1 = '/my/local/path/file.txt'
        file_2 = storage_test_utils.create_file(workspace=workspace_1)
        local_path_2 = '/another/local/path/file.txt'
        file_2.is_deleted = True
        file_2.save()
        file_3 = storage_test_utils.create_file(workspace=workspace_1)
        local_path_3 = '/another/local/path/file.json'
        workspace_1.download_files = MagicMock()

        workspace_2 = storage_test_utils.create_workspace()
        file_4 = storage_test_utils.create_file(workspace=workspace_2)
        local_path_4 = '/my/local/path/4/file.txt'
        file_5 = storage_test_utils.create_file(workspace=workspace_2)
        local_path_5 = '/another/local/path/5/file.txt'
        workspace_2.download_files = MagicMock()

        files = [FileDownload(file_1, local_path_1, False), FileDownload(file_2, local_path_2, False),
                 FileDownload(file_3, local_path_3, False), FileDownload(file_4, local_path_4, False),
                 FileDownload(file_5, local_path_5, False)]
        self.assertRaises(DeletedFile, ScaleFile.objects.download_files, files)
Example #4
0
    def test_download_files(self, mock_client_class):
        """Tests downloading files successfully"""

        s3_object_1 = MagicMock()
        s3_object_2 = MagicMock()
        mock_client = MagicMock(S3Client)
        mock_client.get_object.side_effect = [s3_object_1, s3_object_2]
        mock_client_class.return_value.__enter__ = Mock(return_value=mock_client)

        file_name_1 = 'my_file.txt'
        file_name_2 = 'my_file.json'
        local_path_file_1 = os.path.join('my_dir_1', file_name_1)
        local_path_file_2 = os.path.join('my_dir_2', file_name_2)
        workspace_path_file_1 = os.path.join('my_wrk_dir_1', file_name_1)
        workspace_path_file_2 = os.path.join('my_wrk_dir_2', file_name_2)

        file_1 = storage_test_utils.create_file(file_path=workspace_path_file_1)
        file_2 = storage_test_utils.create_file(file_path=workspace_path_file_2)
        file_1_dl = FileDownload(file_1, local_path_file_1)
        file_2_dl = FileDownload(file_2, local_path_file_2)

        # Call method to test
        mo = mock_open()
        with patch('__builtin__.open', mo, create=True):
            self.broker.download_files(None, [file_1_dl, file_2_dl])

        # Check results
        self.assertTrue(s3_object_1.download_file.called)
        self.assertTrue(s3_object_2.download_file.called)
Example #5
0
    def test_inactive_workspace(self):
        """Tests calling ScaleFileManager.download_files() with an inactive workspace"""

        workspace_1 = storage_test_utils.create_workspace()
        file_1 = storage_test_utils.create_file(workspace=workspace_1)
        local_path_1 = '/my/local/path/file.txt'
        file_2 = storage_test_utils.create_file(workspace=workspace_1)
        local_path_2 = '/another/local/path/file.txt'
        file_3 = storage_test_utils.create_file(workspace=workspace_1)
        local_path_3 = '/another/local/path/file.json'
        workspace_1.download_files = MagicMock()

        workspace_2 = storage_test_utils.create_workspace()
        workspace_2.is_active = False
        workspace_2.save()
        file_4 = storage_test_utils.create_file(workspace=workspace_2)
        local_path_4 = '/my/local/path/4/file.txt'
        file_5 = storage_test_utils.create_file(workspace=workspace_2)
        local_path_5 = '/another/local/path/5/file.txt'

        files = [
            FileDownload(file_1, local_path_1),
            FileDownload(file_2, local_path_2),
            FileDownload(file_3, local_path_3),
            FileDownload(file_4, local_path_4),
            FileDownload(file_5, local_path_5)
        ]
        self.assertRaises(ArchivedWorkspace, ScaleFile.objects.download_files,
                          files)
Example #6
0
    def test_host_link_files(self, mock_execute, mock_client_class):
        """Tests sym-linking files successfully"""

        volume_path = os.path.join('the', 'volume', 'path')
        file_name_1 = 'my_file.txt'
        file_name_2 = 'my_file.json'
        local_path_file_1 = os.path.join('my_dir_1', file_name_1)
        local_path_file_2 = os.path.join('my_dir_2', file_name_2)
        workspace_path_file_1 = os.path.join('my_wrk_dir_1', file_name_1)
        workspace_path_file_2 = os.path.join('my_wrk_dir_2', file_name_2)
        full_workspace_path_file_1 = os.path.join(volume_path, workspace_path_file_1)
        full_workspace_path_file_2 = os.path.join(volume_path, workspace_path_file_2)
        file_1 = storage_test_utils.create_file(file_path=workspace_path_file_1)
        file_2 = storage_test_utils.create_file(file_path=workspace_path_file_2)

        file_1_dl = FileDownload(file_1, local_path_file_1, True)
        file_2_dl = FileDownload(file_2, local_path_file_2, True)

        # Call method to test
        self.broker.download_files(volume_path, [file_1_dl, file_2_dl])

        # Check results
        two_calls = [call(['ln', '-s', full_workspace_path_file_1, local_path_file_1]),
                     call(['ln', '-s', full_workspace_path_file_2, local_path_file_2])]
        mock_execute.assert_has_calls(two_calls)
Example #7
0
    def _retrieve_files(self, data_files):
        """Retrieves the given data files and writes them to the given local directories. If no file with a given ID
        exists, it will not be retrieved and returned in the results.

        :param data_files: Dict with each file ID mapping to an absolute directory path for downloading
        :type data_files: dict of long -> string
        :returns: Dict with each file ID mapping to its absolute local path
        :rtype: dict of long -> string

        :raises ArchivedWorkspace: If any of the files has an archived workspace (no longer active)
        :raises DeletedFile: If any of the files has been deleted
        """

        file_ids = data_files.keys()
        files = ScaleFile.objects.filter(id__in=file_ids)

        file_downloads = []
        results = {}
        local_paths = set()  # Pay attention to file name collisions and update file name if needed
        counter = 0
        for scale_file in files:
            local_path = os.path.join(data_files[scale_file.id], scale_file.file_name)
            while local_path in local_paths:
                # Path collision, try a different file name
                counter += 1
                new_file_name = '%i_%s' % (counter, scale_file.file_name)
                local_path = os.path.join(data_files[scale_file.id], new_file_name)
            local_paths.add(local_path)
            file_downloads.append(FileDownload(scale_file, local_path))
            results[scale_file.id] = local_path

        ScaleFile.objects.download_files(file_downloads)

        return results
Example #8
0
    def handle(self, *args, **options):
        """See :meth:`django.core.management.base.BaseCommand.handle`.

        This method starts the file download process.
        """

        logger.info('Command starting: scale_download_file')
        
        file_id = options.get('file_id')
        local_path = options.get('local_path')

        # Validate the file paths
        if os.path.exists(local_path):
            logger.exception('Local file already exists: %s', local_path)
            sys.exit(1)

        # Attempt to fetch the file model
        try:
            scale_file = ScaleFile.objects.get(pk=file_id)
        except ScaleFile.DoesNotExist:
            logger.exception('Stored file does not exist: %s', file_id)
            sys.exit(1)

        try:
            ScaleFile.objects.download_files([FileDownload(scale_file, local_path)])
        except:
            logger.exception('Unknown error occurred, exit code 1 returning')
            sys.exit(1)
        logger.info('Command completed: scale_download_file')
Example #9
0
def move_files(file_ids, new_workspace=None, new_file_path=None):
    """Moves the given files to a different workspace/uri

    :param file_ids: List of ids of ScaleFile objects to move; should all be from the same workspace
    :type file_ids: [int]
    :param new_workspace: New workspace to move files to
    :type new_workspace: `storage.models.Workspace`
    :param new_file_path: New path for files
    :type new_file_path: string
    """

    try:
        messages = []
        files = ScaleFile.objects.all()
        files = files.select_related('workspace')
        files = files.defer('workspace__json_config')
        files = files.filter(id__in=file_ids).only('id', 'file_name',
                                                   'file_path', 'workspace')
        old_files = []
        old_workspace = files[0].workspace
        if new_workspace:
            # We need a local path to copy the file, try to get a direct path from the broker, if that fails we must
            # download the file and copy from there
            # TODO: a future refactor should make the brokers work off of file objects instead of paths so the extra
            # download is not necessary

            paths = old_workspace.get_file_system_paths([files])
            local_paths = []
            if paths:
                local_paths = paths
            else:
                file_downloads = []
                for file in files:
                    local_path = os.path.join('/tmp', file.file_name)
                    file_downloads.append(FileDownload(file, local_path,
                                                       False))
                    local_paths.append(local_path)
                ScaleFile.objects.download_files(file_downloads)

            uploads = []
            for file, path in zip(files, local_paths):
                old_path = file.file_path
                old_files.append(
                    ScaleFile(file_name=file.file_name,
                              file_path=file.file_path))
                file.file_path = new_file_path if new_file_path else file.file_path
                logger.info('Copying %s in workspace %s to %s in workspace %s',
                            old_path, file.workspace.name, file.file_path,
                            new_workspace.name)
                file_upload = FileUpload(file, path)
                uploads.append(file_upload)
                message = create_move_file_message(file_id=file.id)
                messages.append(message)

            ScaleFile.objects.upload_files(new_workspace, uploads)
        elif new_file_path:
            moves = []
            for file in files:
                logger.info('Moving %s to %s in workspace %s', file.file_path,
                            new_file_path, file.workspace.name)
                moves.append(FileMove(file, new_file_path))
                message = create_move_file_message(file_id=file.id)
                messages.append(message)

            ScaleFile.objects.move_files(moves)
        else:
            logger.info('No new workspace or file path. Doing nothing')

        CommandMessageManager().send_messages(messages)

        if new_workspace:
            # Copied files to new workspace, so delete file in old workspace (if workspace provides local path to do so)
            old_workspace.delete_files(old_files, update_model=False)

    except ScaleError as err:
        err.log()
        sys.exit(err.exit_code)
    except Exception as ex:
        exit_code = GENERAL_FAIL_EXIT_CODE
        err = get_error_by_exception(ex.__class__.__name__)
        if err:
            err.log()
            exit_code = err.exit_code
        else:
            logger.exception('Error performing move_files steps')
        sys.exit(exit_code)
Example #10
0
def perform_ingest(ingest_id):
    """Performs the ingest for the given ingest ID

    :param ingest_id: The ID of the ingest to perform
    :type ingest_id: int
    """

    ingest = _get_ingest(ingest_id)
    file_name = ingest.file_name

    if ingest.status in ['INGESTED', 'DUPLICATE']:
        logger.warning('%s already marked %s, nothing to do', file_name,
                       ingest.status)
        return

    _start_ingest(ingest)
    if ingest.status != 'INGESTING':
        return

    try:
        source_file = ingest.source_file
        if source_file.is_deleted:
            # Source file still marked as deleted, so we must copy/move/register the file
            source_file.set_basic_fields(file_name, ingest.file_size,
                                         ingest.media_type,
                                         ingest.get_data_type_tags())
            source_file.update_uuid(
                file_name)  # Add a stable identifier based on the file name
            source_file.workspace = ingest.workspace
            source_file.file_path = ingest.file_path
            source_file.is_deleted = False
            source_file.is_parsed = False
            source_file.deleted = None
            source_file.parsed = None

            if ingest.new_workspace:
                # We need a local path to copy the file, try to get a direct path from the broker, if that fails we must
                # download the file and copy from there
                # TODO: a future refactor should make the brokers work off of file objects instead of paths so the extra
                # download is not necessary
                paths = ingest.workspace.get_file_system_paths([source_file])
                if paths:
                    local_path = paths[0]
                else:
                    local_path = os.path.join('/tmp', file_name)
                    file_download = FileDownload(source_file, local_path,
                                                 False)
                    ScaleFile.objects.download_files([file_download])
                source_file.file_path = ingest.new_file_path if ingest.new_file_path else ingest.file_path
                logger.info('Copying %s in workspace %s to %s in workspace %s',
                            ingest.file_path, ingest.workspace.name,
                            source_file.file_path, ingest.new_workspace.name)
                file_upload = FileUpload(source_file, local_path)
                ScaleFile.objects.upload_files(ingest.new_workspace,
                                               [file_upload])
            elif ingest.new_file_path:
                logger.info('Moving %s to %s in workspace %s',
                            ingest.file_path, ingest.new_file_path,
                            ingest.workspace.name)
                file_move = FileMove(source_file, ingest.new_file_path)
                ScaleFile.objects.move_files([file_move])
            else:
                logger.info('Registering %s in workspace %s', ingest.file_path,
                            ingest.workspace.name)
                _save_source_file(source_file)

        if ingest.new_workspace:
            # Copied file to new workspace, so delete file in old workspace (if workspace provides local path to do so)
            file_with_old_path = SourceFile.create()
            file_with_old_path.file_name = file_name
            file_with_old_path.file_path = ingest.file_path
            paths = ingest.workspace.get_file_system_paths(
                [file_with_old_path])
            if paths:
                _delete_file(paths[0])

    except Exception:
        _complete_ingest(ingest, 'ERRORED')
        raise

    _complete_ingest(ingest, 'INGESTED')
    logger.info('Ingest successful for %s', file_name)