def test_successfully(self, mock_execute, mock_exists): """Tests calling HostBroker.download_files() successfully""" mock_exists.return_value = True volume_path = os.path.join('the', 'volume', 'path') file_name_1 = 'my_file.txt' file_name_2 = 'my_file.json' local_path_file_1 = os.path.join('my_dir_1', file_name_1) local_path_file_2 = os.path.join('my_dir_2', file_name_2) workspace_path_file_1 = os.path.join('my_wrk_dir_1', file_name_1) workspace_path_file_2 = os.path.join('my_wrk_dir_2', file_name_2) full_workspace_path_file_1 = os.path.join(volume_path, workspace_path_file_1) full_workspace_path_file_2 = os.path.join(volume_path, workspace_path_file_2) file_1 = storage_test_utils.create_file( file_path=workspace_path_file_1) file_2 = storage_test_utils.create_file( file_path=workspace_path_file_2) file_1_dl = FileDownload(file_1, local_path_file_1, False) file_2_dl = FileDownload(file_2, local_path_file_2, False) # Call method to test self.broker.download_files(volume_path, [file_1_dl, file_2_dl]) # Check results two_calls = [ call(['ln', '-s', full_workspace_path_file_1, local_path_file_1]), call(['ln', '-s', full_workspace_path_file_2, local_path_file_2]) ] mock_execute.assert_has_calls(two_calls)
def test_success(self): """Tests calling ScaleFileManager.download_files() successfully""" workspace_1 = storage_test_utils.create_workspace() file_1 = storage_test_utils.create_file(workspace=workspace_1) local_path_1 = '/my/local/path/file.txt' file_2 = storage_test_utils.create_file(workspace=workspace_1) local_path_2 = '/another/local/path/file.txt' file_3 = storage_test_utils.create_file(workspace=workspace_1) local_path_3 = '/another/local/path/file.json' workspace_1.setup_download_dir = MagicMock() workspace_1.download_files = MagicMock() workspace_2 = storage_test_utils.create_workspace() file_4 = storage_test_utils.create_file(workspace=workspace_2) local_path_4 = '/my/local/path/4/file.txt' file_5 = storage_test_utils.create_file(workspace=workspace_2) local_path_5 = '/another/local/path/5/file.txt' workspace_2.setup_download_dir = MagicMock() workspace_2.download_files = MagicMock() files = [FileDownload(file_1, local_path_1, False), FileDownload(file_2, local_path_2, False), FileDownload(file_3, local_path_3, False), FileDownload(file_4, local_path_4, False), FileDownload(file_5, local_path_5, False)] ScaleFile.objects.download_files(files) workspace_1.download_files.assert_called_once_with([FileDownload(file_1, local_path_1, False), FileDownload(file_2, local_path_2, False), FileDownload(file_3, local_path_3, False)]) workspace_2.download_files.assert_called_once_with([FileDownload(file_4, local_path_4, False), FileDownload(file_5, local_path_5, False)])
def test_deleted_file(self): """Tests calling ScaleFileManager.download_files() with a deleted file""" workspace_1 = storage_test_utils.create_workspace() file_1 = storage_test_utils.create_file(workspace=workspace_1) local_path_1 = '/my/local/path/file.txt' file_2 = storage_test_utils.create_file(workspace=workspace_1) local_path_2 = '/another/local/path/file.txt' file_2.is_deleted = True file_2.save() file_3 = storage_test_utils.create_file(workspace=workspace_1) local_path_3 = '/another/local/path/file.json' workspace_1.download_files = MagicMock() workspace_2 = storage_test_utils.create_workspace() file_4 = storage_test_utils.create_file(workspace=workspace_2) local_path_4 = '/my/local/path/4/file.txt' file_5 = storage_test_utils.create_file(workspace=workspace_2) local_path_5 = '/another/local/path/5/file.txt' workspace_2.download_files = MagicMock() files = [FileDownload(file_1, local_path_1, False), FileDownload(file_2, local_path_2, False), FileDownload(file_3, local_path_3, False), FileDownload(file_4, local_path_4, False), FileDownload(file_5, local_path_5, False)] self.assertRaises(DeletedFile, ScaleFile.objects.download_files, files)
def test_download_files(self, mock_client_class): """Tests downloading files successfully""" s3_object_1 = MagicMock() s3_object_2 = MagicMock() mock_client = MagicMock(S3Client) mock_client.get_object.side_effect = [s3_object_1, s3_object_2] mock_client_class.return_value.__enter__ = Mock(return_value=mock_client) file_name_1 = 'my_file.txt' file_name_2 = 'my_file.json' local_path_file_1 = os.path.join('my_dir_1', file_name_1) local_path_file_2 = os.path.join('my_dir_2', file_name_2) workspace_path_file_1 = os.path.join('my_wrk_dir_1', file_name_1) workspace_path_file_2 = os.path.join('my_wrk_dir_2', file_name_2) file_1 = storage_test_utils.create_file(file_path=workspace_path_file_1) file_2 = storage_test_utils.create_file(file_path=workspace_path_file_2) file_1_dl = FileDownload(file_1, local_path_file_1) file_2_dl = FileDownload(file_2, local_path_file_2) # Call method to test mo = mock_open() with patch('__builtin__.open', mo, create=True): self.broker.download_files(None, [file_1_dl, file_2_dl]) # Check results self.assertTrue(s3_object_1.download_file.called) self.assertTrue(s3_object_2.download_file.called)
def test_inactive_workspace(self): """Tests calling ScaleFileManager.download_files() with an inactive workspace""" workspace_1 = storage_test_utils.create_workspace() file_1 = storage_test_utils.create_file(workspace=workspace_1) local_path_1 = '/my/local/path/file.txt' file_2 = storage_test_utils.create_file(workspace=workspace_1) local_path_2 = '/another/local/path/file.txt' file_3 = storage_test_utils.create_file(workspace=workspace_1) local_path_3 = '/another/local/path/file.json' workspace_1.download_files = MagicMock() workspace_2 = storage_test_utils.create_workspace() workspace_2.is_active = False workspace_2.save() file_4 = storage_test_utils.create_file(workspace=workspace_2) local_path_4 = '/my/local/path/4/file.txt' file_5 = storage_test_utils.create_file(workspace=workspace_2) local_path_5 = '/another/local/path/5/file.txt' files = [ FileDownload(file_1, local_path_1), FileDownload(file_2, local_path_2), FileDownload(file_3, local_path_3), FileDownload(file_4, local_path_4), FileDownload(file_5, local_path_5) ] self.assertRaises(ArchivedWorkspace, ScaleFile.objects.download_files, files)
def test_host_link_files(self, mock_execute, mock_client_class): """Tests sym-linking files successfully""" volume_path = os.path.join('the', 'volume', 'path') file_name_1 = 'my_file.txt' file_name_2 = 'my_file.json' local_path_file_1 = os.path.join('my_dir_1', file_name_1) local_path_file_2 = os.path.join('my_dir_2', file_name_2) workspace_path_file_1 = os.path.join('my_wrk_dir_1', file_name_1) workspace_path_file_2 = os.path.join('my_wrk_dir_2', file_name_2) full_workspace_path_file_1 = os.path.join(volume_path, workspace_path_file_1) full_workspace_path_file_2 = os.path.join(volume_path, workspace_path_file_2) file_1 = storage_test_utils.create_file(file_path=workspace_path_file_1) file_2 = storage_test_utils.create_file(file_path=workspace_path_file_2) file_1_dl = FileDownload(file_1, local_path_file_1, True) file_2_dl = FileDownload(file_2, local_path_file_2, True) # Call method to test self.broker.download_files(volume_path, [file_1_dl, file_2_dl]) # Check results two_calls = [call(['ln', '-s', full_workspace_path_file_1, local_path_file_1]), call(['ln', '-s', full_workspace_path_file_2, local_path_file_2])] mock_execute.assert_has_calls(two_calls)
def _retrieve_files(self, data_files): """Retrieves the given data files and writes them to the given local directories. If no file with a given ID exists, it will not be retrieved and returned in the results. :param data_files: Dict with each file ID mapping to an absolute directory path for downloading :type data_files: dict of long -> string :returns: Dict with each file ID mapping to its absolute local path :rtype: dict of long -> string :raises ArchivedWorkspace: If any of the files has an archived workspace (no longer active) :raises DeletedFile: If any of the files has been deleted """ file_ids = data_files.keys() files = ScaleFile.objects.filter(id__in=file_ids) file_downloads = [] results = {} local_paths = set() # Pay attention to file name collisions and update file name if needed counter = 0 for scale_file in files: local_path = os.path.join(data_files[scale_file.id], scale_file.file_name) while local_path in local_paths: # Path collision, try a different file name counter += 1 new_file_name = '%i_%s' % (counter, scale_file.file_name) local_path = os.path.join(data_files[scale_file.id], new_file_name) local_paths.add(local_path) file_downloads.append(FileDownload(scale_file, local_path)) results[scale_file.id] = local_path ScaleFile.objects.download_files(file_downloads) return results
def handle(self, *args, **options): """See :meth:`django.core.management.base.BaseCommand.handle`. This method starts the file download process. """ logger.info('Command starting: scale_download_file') file_id = options.get('file_id') local_path = options.get('local_path') # Validate the file paths if os.path.exists(local_path): logger.exception('Local file already exists: %s', local_path) sys.exit(1) # Attempt to fetch the file model try: scale_file = ScaleFile.objects.get(pk=file_id) except ScaleFile.DoesNotExist: logger.exception('Stored file does not exist: %s', file_id) sys.exit(1) try: ScaleFile.objects.download_files([FileDownload(scale_file, local_path)]) except: logger.exception('Unknown error occurred, exit code 1 returning') sys.exit(1) logger.info('Command completed: scale_download_file')
def move_files(file_ids, new_workspace=None, new_file_path=None): """Moves the given files to a different workspace/uri :param file_ids: List of ids of ScaleFile objects to move; should all be from the same workspace :type file_ids: [int] :param new_workspace: New workspace to move files to :type new_workspace: `storage.models.Workspace` :param new_file_path: New path for files :type new_file_path: string """ try: messages = [] files = ScaleFile.objects.all() files = files.select_related('workspace') files = files.defer('workspace__json_config') files = files.filter(id__in=file_ids).only('id', 'file_name', 'file_path', 'workspace') old_files = [] old_workspace = files[0].workspace if new_workspace: # We need a local path to copy the file, try to get a direct path from the broker, if that fails we must # download the file and copy from there # TODO: a future refactor should make the brokers work off of file objects instead of paths so the extra # download is not necessary paths = old_workspace.get_file_system_paths([files]) local_paths = [] if paths: local_paths = paths else: file_downloads = [] for file in files: local_path = os.path.join('/tmp', file.file_name) file_downloads.append(FileDownload(file, local_path, False)) local_paths.append(local_path) ScaleFile.objects.download_files(file_downloads) uploads = [] for file, path in zip(files, local_paths): old_path = file.file_path old_files.append( ScaleFile(file_name=file.file_name, file_path=file.file_path)) file.file_path = new_file_path if new_file_path else file.file_path logger.info('Copying %s in workspace %s to %s in workspace %s', old_path, file.workspace.name, file.file_path, new_workspace.name) file_upload = FileUpload(file, path) uploads.append(file_upload) message = create_move_file_message(file_id=file.id) messages.append(message) ScaleFile.objects.upload_files(new_workspace, uploads) elif new_file_path: moves = [] for file in files: logger.info('Moving %s to %s in workspace %s', file.file_path, new_file_path, file.workspace.name) moves.append(FileMove(file, new_file_path)) message = create_move_file_message(file_id=file.id) messages.append(message) ScaleFile.objects.move_files(moves) else: logger.info('No new workspace or file path. Doing nothing') CommandMessageManager().send_messages(messages) if new_workspace: # Copied files to new workspace, so delete file in old workspace (if workspace provides local path to do so) old_workspace.delete_files(old_files, update_model=False) except ScaleError as err: err.log() sys.exit(err.exit_code) except Exception as ex: exit_code = GENERAL_FAIL_EXIT_CODE err = get_error_by_exception(ex.__class__.__name__) if err: err.log() exit_code = err.exit_code else: logger.exception('Error performing move_files steps') sys.exit(exit_code)
def perform_ingest(ingest_id): """Performs the ingest for the given ingest ID :param ingest_id: The ID of the ingest to perform :type ingest_id: int """ ingest = _get_ingest(ingest_id) file_name = ingest.file_name if ingest.status in ['INGESTED', 'DUPLICATE']: logger.warning('%s already marked %s, nothing to do', file_name, ingest.status) return _start_ingest(ingest) if ingest.status != 'INGESTING': return try: source_file = ingest.source_file if source_file.is_deleted: # Source file still marked as deleted, so we must copy/move/register the file source_file.set_basic_fields(file_name, ingest.file_size, ingest.media_type, ingest.get_data_type_tags()) source_file.update_uuid( file_name) # Add a stable identifier based on the file name source_file.workspace = ingest.workspace source_file.file_path = ingest.file_path source_file.is_deleted = False source_file.is_parsed = False source_file.deleted = None source_file.parsed = None if ingest.new_workspace: # We need a local path to copy the file, try to get a direct path from the broker, if that fails we must # download the file and copy from there # TODO: a future refactor should make the brokers work off of file objects instead of paths so the extra # download is not necessary paths = ingest.workspace.get_file_system_paths([source_file]) if paths: local_path = paths[0] else: local_path = os.path.join('/tmp', file_name) file_download = FileDownload(source_file, local_path, False) ScaleFile.objects.download_files([file_download]) source_file.file_path = ingest.new_file_path if ingest.new_file_path else ingest.file_path logger.info('Copying %s in workspace %s to %s in workspace %s', ingest.file_path, ingest.workspace.name, source_file.file_path, ingest.new_workspace.name) file_upload = FileUpload(source_file, local_path) ScaleFile.objects.upload_files(ingest.new_workspace, [file_upload]) elif ingest.new_file_path: logger.info('Moving %s to %s in workspace %s', ingest.file_path, ingest.new_file_path, ingest.workspace.name) file_move = FileMove(source_file, ingest.new_file_path) ScaleFile.objects.move_files([file_move]) else: logger.info('Registering %s in workspace %s', ingest.file_path, ingest.workspace.name) _save_source_file(source_file) if ingest.new_workspace: # Copied file to new workspace, so delete file in old workspace (if workspace provides local path to do so) file_with_old_path = SourceFile.create() file_with_old_path.file_name = file_name file_with_old_path.file_path = ingest.file_path paths = ingest.workspace.get_file_system_paths( [file_with_old_path]) if paths: _delete_file(paths[0]) except Exception: _complete_ingest(ingest, 'ERRORED') raise _complete_ingest(ingest, 'INGESTED') logger.info('Ingest successful for %s', file_name)