def test_inactive_workspace(self): """Tests calling ScaleFileManager.move_files() with an inactive workspace""" workspace_1 = storage_test_utils.create_workspace() file_1 = storage_test_utils.create_file(file_name='my_file_1.txt', workspace=workspace_1) new_workspace_path_1 = os.path.join('my', 'new', 'path', '1', os.path.basename(file_1.file_path)) file_2 = storage_test_utils.create_file(file_name='my_file_2.txt', workspace=workspace_1) new_workspace_path_2 = os.path.join('my', 'new', 'path', '2', os.path.basename(file_2.file_path)) workspace_1.move_files = MagicMock() workspace_2 = storage_test_utils.create_workspace() workspace_2.is_active = False workspace_2.save() file_3 = storage_test_utils.create_file(file_name='my_file_3.txt', workspace=workspace_2) new_workspace_path_3 = os.path.join('my', 'new', 'path', '3', os.path.basename(file_3.file_path)) file_4 = storage_test_utils.create_file(file_name='my_file_4.txt', workspace=workspace_2) new_workspace_path_4 = os.path.join('my', 'new', 'path', '4', os.path.basename(file_4.file_path)) workspace_2.move_files = MagicMock() files = [ FileMove(file_1, new_workspace_path_1), FileMove(file_2, new_workspace_path_2), FileMove(file_3, new_workspace_path_3), FileMove(file_4, new_workspace_path_4) ] self.assertRaises(ArchivedWorkspace, ScaleFile.objects.move_files, files)
def test_move_files(self, mock_client_class): """Tests moving files successfully""" s3_object_1a = MagicMock() s3_object_1b = MagicMock() s3_object_2a = MagicMock() s3_object_2b = MagicMock() mock_client = MagicMock(S3Client) mock_client.get_object.side_effect = [s3_object_1a, s3_object_1b, s3_object_2a, s3_object_2b] mock_client_class.return_value.__enter__ = Mock(return_value=mock_client) file_name_1 = 'my_file.txt' file_name_2 = 'my_file.json' old_workspace_path_1 = os.path.join('my_dir_1', file_name_1) old_workspace_path_2 = os.path.join('my_dir_2', file_name_2) new_workspace_path_1 = os.path.join('my_new_dir_1', file_name_1) new_workspace_path_2 = os.path.join('my_new_dir_2', file_name_2) file_1 = storage_test_utils.create_file(file_path=old_workspace_path_1) file_2 = storage_test_utils.create_file(file_path=old_workspace_path_2) file_1_mv = FileMove(file_1, new_workspace_path_1) file_2_mv = FileMove(file_2, new_workspace_path_2) # Call method to test self.broker.move_files(None, [file_1_mv, file_2_mv]) # Check results self.assertTrue(s3_object_1b.copy_from.called) self.assertTrue(s3_object_1a.delete.called) self.assertTrue(s3_object_2b.copy_from.called) self.assertTrue(s3_object_2a.delete.called) self.assertEqual(file_1.file_path, new_workspace_path_1) self.assertEqual(file_2.file_path, new_workspace_path_2)
def test_move_files(self, mock_conn_class): """Tests moving files successfully""" s3_key_1 = MagicMock(Key) s3_key_2 = MagicMock(Key) mock_conn = MagicMock(BrokerConnection) mock_conn.get_key.side_effect = [s3_key_1, s3_key_2] mock_conn_class.return_value.__enter__ = Mock(return_value=mock_conn) file_name_1 = 'my_file.txt' file_name_2 = 'my_file.json' old_workspace_path_1 = os.path.join('my_dir_1', file_name_1) old_workspace_path_2 = os.path.join('my_dir_2', file_name_2) new_workspace_path_1 = os.path.join('my_new_dir_1', file_name_1) new_workspace_path_2 = os.path.join('my_new_dir_2', file_name_2) file_1 = storage_test_utils.create_file(file_path=old_workspace_path_1) file_2 = storage_test_utils.create_file(file_path=old_workspace_path_2) file_1_mv = FileMove(file_1, new_workspace_path_1) file_2_mv = FileMove(file_2, new_workspace_path_2) # Call method to test self.broker.move_files(None, [file_1_mv, file_2_mv]) # Check results self.assertTrue(s3_key_1.copy.called) self.assertTrue(s3_key_2.copy.called) self.assertEqual(file_1.file_path, new_workspace_path_1) self.assertEqual(file_2.file_path, new_workspace_path_2)
def test_successfully(self, mock_move, mock_chmod, mock_exists, mock_makedirs): """Tests calling HostBroker.move_files() successfully""" def new_exists(path): return path.count('new') == 0 mock_exists.side_effect = new_exists volume_path = os.path.join('the', 'volume', 'path') file_name_1 = 'my_file.txt' file_name_2 = 'my_file.json' old_workspace_path_1 = os.path.join('my_dir_1', file_name_1) old_workspace_path_2 = os.path.join('my_dir_2', file_name_2) new_workspace_path_1 = os.path.join('my_new_dir_1', file_name_1) new_workspace_path_2 = os.path.join('my_new_dir_2', file_name_2) full_old_workspace_path_1 = os.path.join(volume_path, old_workspace_path_1) full_old_workspace_path_2 = os.path.join(volume_path, old_workspace_path_2) full_new_workspace_path_1 = os.path.join(volume_path, new_workspace_path_1) full_new_workspace_path_2 = os.path.join(volume_path, new_workspace_path_2) file_1 = storage_test_utils.create_file(file_path=old_workspace_path_1) file_2 = storage_test_utils.create_file(file_path=old_workspace_path_2) file_1_mv = FileMove(file_1, new_workspace_path_1) file_2_mv = FileMove(file_2, new_workspace_path_2) # Call method to test self.broker.move_files(volume_path, [file_1_mv, file_2_mv]) # Check results two_calls = [ call(os.path.dirname(full_new_workspace_path_1), mode=0755), call(os.path.dirname(full_new_workspace_path_2), mode=0755) ] mock_makedirs.assert_has_calls(two_calls) two_calls = [ call(full_old_workspace_path_1, full_new_workspace_path_1), call(full_old_workspace_path_2, full_new_workspace_path_2) ] mock_move.assert_has_calls(two_calls) two_calls = [ call(full_new_workspace_path_1, 0644), call(full_new_workspace_path_2, 0644) ] mock_chmod.assert_has_calls(two_calls) self.assertEqual(file_1.file_path, new_workspace_path_1) self.assertEqual(file_2.file_path, new_workspace_path_2)
def test_success(self): """Tests calling ScaleFileManager.move_files() successfully""" workspace_1 = storage_test_utils.create_workspace() file_1 = storage_test_utils.create_file(file_name='my_file_1.txt', workspace=workspace_1) new_workspace_path_1 = os.path.join('my', 'new', 'path', '1', os.path.basename(file_1.file_path)) file_2 = storage_test_utils.create_file(file_name='my_file_2.txt', workspace=workspace_1) new_workspace_path_2 = os.path.join('my', 'new', 'path', '2', os.path.basename(file_2.file_path)) workspace_1.move_files = MagicMock() workspace_2 = storage_test_utils.create_workspace() file_3 = storage_test_utils.create_file(file_name='my_file_3.txt', workspace=workspace_2) new_workspace_path_3 = os.path.join('my', 'new', 'path', '3', os.path.basename(file_3.file_path)) file_4 = storage_test_utils.create_file(file_name='my_file_4.txt', workspace=workspace_2) new_workspace_path_4 = os.path.join('my', 'new', 'path', '4', os.path.basename(file_4.file_path)) workspace_2.move_files = MagicMock() files = [FileMove(file_1, new_workspace_path_1), FileMove(file_2, new_workspace_path_2), FileMove(file_3, new_workspace_path_3), FileMove(file_4, new_workspace_path_4)] ScaleFile.objects.move_files(files) workspace_1.move_files.assert_called_once_with([FileMove(file_1, new_workspace_path_1), FileMove(file_2, new_workspace_path_2)]) workspace_2.move_files.assert_called_once_with([FileMove(file_3, new_workspace_path_3), FileMove(file_4, new_workspace_path_4)])
def test_move_source_file(self, mock_move_files): """Tests calling save_parse_results so that the source file is moved to a different path in the workspace""" new_path = os.path.join('the', 'new', 'workspace', 'path', self.src_file.file_name) # Call method to test SourceFile.objects.save_parse_results(self.src_file.id, None, None, None, [], new_path) # Check results mock_move_files.assert_called_once_with([FileMove(self.src_file, new_path)])
def handle(self, file_id, remote_path, **options): """See :meth:`django.core.management.base.BaseCommand.handle`. This method starts the file move process. """ logger.info('Command starting: scale_move_file') # Attempt to fetch the file model try: scale_file = ScaleFile.objects.get(pk=file_id) except ScaleFile.DoesNotExist: logger.exception('Stored file does not exist: %s', file_id) sys.exit(1) try: ScaleFile.objects.move_files([FileMove(scale_file, remote_path)]) except: logger.exception('Unknown error occurred, exit code 1 returning') sys.exit(1) logger.info('Command completed: scale_move_file')
def move_files(file_ids, new_workspace=None, new_file_path=None): """Moves the given files to a different workspace/uri :param file_ids: List of ids of ScaleFile objects to move; should all be from the same workspace :type file_ids: [int] :param new_workspace: New workspace to move files to :type new_workspace: `storage.models.Workspace` :param new_file_path: New path for files :type new_file_path: string """ try: messages = [] files = ScaleFile.objects.all() files = files.select_related('workspace') files = files.defer('workspace__json_config') files = files.filter(id__in=file_ids).only('id', 'file_name', 'file_path', 'workspace') old_files = [] old_workspace = files[0].workspace if new_workspace: # We need a local path to copy the file, try to get a direct path from the broker, if that fails we must # download the file and copy from there # TODO: a future refactor should make the brokers work off of file objects instead of paths so the extra # download is not necessary paths = old_workspace.get_file_system_paths([files]) local_paths = [] if paths: local_paths = paths else: file_downloads = [] for file in files: local_path = os.path.join('/tmp', file.file_name) file_downloads.append(FileDownload(file, local_path, False)) local_paths.append(local_path) ScaleFile.objects.download_files(file_downloads) uploads = [] for file, path in zip(files, local_paths): old_path = file.file_path old_files.append( ScaleFile(file_name=file.file_name, file_path=file.file_path)) file.file_path = new_file_path if new_file_path else file.file_path logger.info('Copying %s in workspace %s to %s in workspace %s', old_path, file.workspace.name, file.file_path, new_workspace.name) file_upload = FileUpload(file, path) uploads.append(file_upload) message = create_move_file_message(file_id=file.id) messages.append(message) ScaleFile.objects.upload_files(new_workspace, uploads) elif new_file_path: moves = [] for file in files: logger.info('Moving %s to %s in workspace %s', file.file_path, new_file_path, file.workspace.name) moves.append(FileMove(file, new_file_path)) message = create_move_file_message(file_id=file.id) messages.append(message) ScaleFile.objects.move_files(moves) else: logger.info('No new workspace or file path. Doing nothing') CommandMessageManager().send_messages(messages) if new_workspace: # Copied files to new workspace, so delete file in old workspace (if workspace provides local path to do so) old_workspace.delete_files(old_files, update_model=False) except ScaleError as err: err.log() sys.exit(err.exit_code) except Exception as ex: exit_code = GENERAL_FAIL_EXIT_CODE err = get_error_by_exception(ex.__class__.__name__) if err: err.log() exit_code = err.exit_code else: logger.exception('Error performing move_files steps') sys.exit(exit_code)
def perform_ingest(ingest_id): """Performs the ingest for the given ingest ID :param ingest_id: The ID of the ingest to perform :type ingest_id: int """ ingest = _get_ingest(ingest_id) file_name = ingest.file_name if ingest.status in ['INGESTED', 'DUPLICATE']: logger.warning('%s already marked %s, nothing to do', file_name, ingest.status) return _start_ingest(ingest) if ingest.status != 'INGESTING': return try: source_file = ingest.source_file if source_file.is_deleted: # Source file still marked as deleted, so we must copy/move/register the file source_file.set_basic_fields(file_name, ingest.file_size, ingest.media_type, ingest.get_data_type_tags()) source_file.update_uuid( file_name) # Add a stable identifier based on the file name source_file.workspace = ingest.workspace source_file.file_path = ingest.file_path source_file.is_deleted = False source_file.is_parsed = False source_file.deleted = None source_file.parsed = None if ingest.new_workspace: # We need a local path to copy the file, try to get a direct path from the broker, if that fails we must # download the file and copy from there # TODO: a future refactor should make the brokers work off of file objects instead of paths so the extra # download is not necessary paths = ingest.workspace.get_file_system_paths([source_file]) if paths: local_path = paths[0] else: local_path = os.path.join('/tmp', file_name) file_download = FileDownload(source_file, local_path, False) ScaleFile.objects.download_files([file_download]) source_file.file_path = ingest.new_file_path if ingest.new_file_path else ingest.file_path logger.info('Copying %s in workspace %s to %s in workspace %s', ingest.file_path, ingest.workspace.name, source_file.file_path, ingest.new_workspace.name) file_upload = FileUpload(source_file, local_path) ScaleFile.objects.upload_files(ingest.new_workspace, [file_upload]) elif ingest.new_file_path: logger.info('Moving %s to %s in workspace %s', ingest.file_path, ingest.new_file_path, ingest.workspace.name) file_move = FileMove(source_file, ingest.new_file_path) ScaleFile.objects.move_files([file_move]) else: logger.info('Registering %s in workspace %s', ingest.file_path, ingest.workspace.name) _save_source_file(source_file) if ingest.new_workspace: # Copied file to new workspace, so delete file in old workspace (if workspace provides local path to do so) file_with_old_path = SourceFile.create() file_with_old_path.file_name = file_name file_with_old_path.file_path = ingest.file_path paths = ingest.workspace.get_file_system_paths( [file_with_old_path]) if paths: _delete_file(paths[0]) except Exception: _complete_ingest(ingest, 'ERRORED') raise _complete_ingest(ingest, 'INGESTED') logger.info('Ingest successful for %s', file_name)
def save_parse_results(self, src_file_id, geo_json, data_started, data_ended, data_types, new_workspace_path): """Saves the given parse results to the source file for the given ID. All database changes occur in an atomic transaction. :param src_file_id: The ID of the source file :type src_file_id: int :param geo_json: The associated geojson data, possibly None :type geo_json: dict :param data_started: The start time of the data contained in the source file, possibly None :type data_started: :class:`datetime.datetime` or None :param data_ended: The end time of the data contained in the source file, possibly None :type data_ended: :class:`datetime.datetime` or None :param data_types: List of strings containing the data types tags for this source file. :type data_types: [string] :param new_workspace_path: New workspace path to move the source file to now that parse data is available. If None, the source file should not be moved. :type new_workspace_path: str """ geom = None props = None if geo_json: geom, props = geo_utils.parse_geo_json(geo_json) # Acquire model lock src_file = ScaleFile.objects.select_for_update().get( pk=src_file_id, file_type='SOURCE') src_file.is_parsed = True src_file.parsed = now() if data_started and not data_ended: src_file.data_started = data_started src_file.data_ended = data_started elif not data_started and data_ended: src_file.data_started = data_ended src_file.data_ended = data_ended elif not data_ended and not data_started: src_file.data_started = None src_file.data_ended = None else: src_file.data_started = data_started src_file.data_ended = data_ended src_file.source_started = src_file.data_started src_file.source_ended = src_file.data_ended for tag in data_types: src_file.add_data_type_tag(tag) if geom: src_file.geometry = geom src_file.center_point = geo_utils.get_center_point(geom) if props: src_file.meta_data = props # src_file already exists so we don't need to save/set_countries/save, just a single save is fine src_file.set_countries() src_file.save() try: # Try to update corresponding ingest models with this file's data time from ingest.models import Ingest Ingest.objects.filter(source_file_id=src_file_id).update( data_started=data_started, data_ended=data_ended) except ImportError: pass # Move the source file if a new workspace path is provided and the workspace allows it old_workspace_path = src_file.file_path if new_workspace_path and src_file.workspace.is_move_enabled: ScaleFile.objects.move_files( [FileMove(src_file, new_workspace_path)])
def save_parse_results(self, src_file_id, geo_json, data_started, data_ended, data_types, new_workspace_path): """Saves the given parse results to the source file for the given ID. All database changes occur in an atomic transaction. :param src_file_id: The ID of the source file :type src_file_id: int :param geo_json: The associated geojson data, possibly None :type geo_json: dict :param data_started: The start time of the data contained in the source file, possibly None :type data_started: :class:`datetime.datetime` or None :param data_ended: The end time of the data contained in the source file, possibly None :type data_ended: :class:`datetime.datetime` or None :param data_types: List of strings containing the data types tags for this source file. :type data_types: [string] :param new_workspace_path: New workspace path to move the source file to now that parse data is available. If None, the source file should not be moved. :type new_workspace_path: str """ geom = None props = None if geo_json: geom, props = geo_utils.parse_geo_json(geo_json) # Acquire model lock src_file = SourceFile.objects.select_for_update().get(pk=src_file_id) src_file.is_parsed = True src_file.parsed = now() src_file.data_started = data_started src_file.data_ended = data_ended target_date = src_file.data_started if target_date is None: target_date = src_file.data_ended if target_date is None: target_date = src_file.created for tag in data_types: src_file.add_data_type_tag(tag) if geom: src_file.geometry = geom src_file.center_point = geo_utils.get_center_point(geom) src_file.meta_data = props # src_file already exists so we don't need to save/set_countries/save, just a single save is fine src_file.set_countries() src_file.save() # Move the source file if a new workspace path is provided and the workspace allows it old_workspace_path = src_file.file_path if new_workspace_path and src_file.workspace.is_move_enabled: ScaleFile.objects.move_files( [FileMove(src_file, new_workspace_path)]) try: # Check trigger rules for parsed source files ParseTriggerHandler().process_parsed_source_file(src_file) except Exception: # Move file back if there was an error if new_workspace_path and src_file.workspace.is_move_enabled: ScaleFile.objects.move_files( [FileMove(src_file, old_workspace_path)]) raise