Ejemplo n.º 1
0
    def test_inactive_workspace(self):
        """Tests calling ScaleFileManager.move_files() with an inactive workspace"""

        workspace_1 = storage_test_utils.create_workspace()
        file_1 = storage_test_utils.create_file(file_name='my_file_1.txt',
                                                workspace=workspace_1)
        new_workspace_path_1 = os.path.join('my', 'new', 'path', '1',
                                            os.path.basename(file_1.file_path))
        file_2 = storage_test_utils.create_file(file_name='my_file_2.txt',
                                                workspace=workspace_1)
        new_workspace_path_2 = os.path.join('my', 'new', 'path', '2',
                                            os.path.basename(file_2.file_path))
        workspace_1.move_files = MagicMock()

        workspace_2 = storage_test_utils.create_workspace()
        workspace_2.is_active = False
        workspace_2.save()
        file_3 = storage_test_utils.create_file(file_name='my_file_3.txt',
                                                workspace=workspace_2)
        new_workspace_path_3 = os.path.join('my', 'new', 'path', '3',
                                            os.path.basename(file_3.file_path))
        file_4 = storage_test_utils.create_file(file_name='my_file_4.txt',
                                                workspace=workspace_2)
        new_workspace_path_4 = os.path.join('my', 'new', 'path', '4',
                                            os.path.basename(file_4.file_path))
        workspace_2.move_files = MagicMock()

        files = [
            FileMove(file_1, new_workspace_path_1),
            FileMove(file_2, new_workspace_path_2),
            FileMove(file_3, new_workspace_path_3),
            FileMove(file_4, new_workspace_path_4)
        ]
        self.assertRaises(ArchivedWorkspace, ScaleFile.objects.move_files,
                          files)
Ejemplo n.º 2
0
    def test_move_files(self, mock_client_class):
        """Tests moving files successfully"""

        s3_object_1a = MagicMock()
        s3_object_1b = MagicMock()
        s3_object_2a = MagicMock()
        s3_object_2b = MagicMock()
        mock_client = MagicMock(S3Client)
        mock_client.get_object.side_effect = [s3_object_1a, s3_object_1b, s3_object_2a, s3_object_2b]
        mock_client_class.return_value.__enter__ = Mock(return_value=mock_client)

        file_name_1 = 'my_file.txt'
        file_name_2 = 'my_file.json'
        old_workspace_path_1 = os.path.join('my_dir_1', file_name_1)
        old_workspace_path_2 = os.path.join('my_dir_2', file_name_2)
        new_workspace_path_1 = os.path.join('my_new_dir_1', file_name_1)
        new_workspace_path_2 = os.path.join('my_new_dir_2', file_name_2)

        file_1 = storage_test_utils.create_file(file_path=old_workspace_path_1)
        file_2 = storage_test_utils.create_file(file_path=old_workspace_path_2)
        file_1_mv = FileMove(file_1, new_workspace_path_1)
        file_2_mv = FileMove(file_2, new_workspace_path_2)

        # Call method to test
        self.broker.move_files(None, [file_1_mv, file_2_mv])

        # Check results
        self.assertTrue(s3_object_1b.copy_from.called)
        self.assertTrue(s3_object_1a.delete.called)
        self.assertTrue(s3_object_2b.copy_from.called)
        self.assertTrue(s3_object_2a.delete.called)
        self.assertEqual(file_1.file_path, new_workspace_path_1)
        self.assertEqual(file_2.file_path, new_workspace_path_2)
Ejemplo n.º 3
0
    def test_move_files(self, mock_conn_class):
        """Tests moving files successfully"""

        s3_key_1 = MagicMock(Key)
        s3_key_2 = MagicMock(Key)
        mock_conn = MagicMock(BrokerConnection)
        mock_conn.get_key.side_effect = [s3_key_1, s3_key_2]
        mock_conn_class.return_value.__enter__ = Mock(return_value=mock_conn)

        file_name_1 = 'my_file.txt'
        file_name_2 = 'my_file.json'
        old_workspace_path_1 = os.path.join('my_dir_1', file_name_1)
        old_workspace_path_2 = os.path.join('my_dir_2', file_name_2)
        new_workspace_path_1 = os.path.join('my_new_dir_1', file_name_1)
        new_workspace_path_2 = os.path.join('my_new_dir_2', file_name_2)

        file_1 = storage_test_utils.create_file(file_path=old_workspace_path_1)
        file_2 = storage_test_utils.create_file(file_path=old_workspace_path_2)
        file_1_mv = FileMove(file_1, new_workspace_path_1)
        file_2_mv = FileMove(file_2, new_workspace_path_2)

        # Call method to test
        self.broker.move_files(None, [file_1_mv, file_2_mv])

        # Check results
        self.assertTrue(s3_key_1.copy.called)
        self.assertTrue(s3_key_2.copy.called)
        self.assertEqual(file_1.file_path, new_workspace_path_1)
        self.assertEqual(file_2.file_path, new_workspace_path_2)
Ejemplo n.º 4
0
    def test_successfully(self, mock_move, mock_chmod, mock_exists,
                          mock_makedirs):
        """Tests calling HostBroker.move_files() successfully"""
        def new_exists(path):
            return path.count('new') == 0

        mock_exists.side_effect = new_exists

        volume_path = os.path.join('the', 'volume', 'path')
        file_name_1 = 'my_file.txt'
        file_name_2 = 'my_file.json'
        old_workspace_path_1 = os.path.join('my_dir_1', file_name_1)
        old_workspace_path_2 = os.path.join('my_dir_2', file_name_2)
        new_workspace_path_1 = os.path.join('my_new_dir_1', file_name_1)
        new_workspace_path_2 = os.path.join('my_new_dir_2', file_name_2)
        full_old_workspace_path_1 = os.path.join(volume_path,
                                                 old_workspace_path_1)
        full_old_workspace_path_2 = os.path.join(volume_path,
                                                 old_workspace_path_2)
        full_new_workspace_path_1 = os.path.join(volume_path,
                                                 new_workspace_path_1)
        full_new_workspace_path_2 = os.path.join(volume_path,
                                                 new_workspace_path_2)

        file_1 = storage_test_utils.create_file(file_path=old_workspace_path_1)
        file_2 = storage_test_utils.create_file(file_path=old_workspace_path_2)
        file_1_mv = FileMove(file_1, new_workspace_path_1)
        file_2_mv = FileMove(file_2, new_workspace_path_2)

        # Call method to test
        self.broker.move_files(volume_path, [file_1_mv, file_2_mv])

        # Check results
        two_calls = [
            call(os.path.dirname(full_new_workspace_path_1), mode=0755),
            call(os.path.dirname(full_new_workspace_path_2), mode=0755)
        ]
        mock_makedirs.assert_has_calls(two_calls)
        two_calls = [
            call(full_old_workspace_path_1, full_new_workspace_path_1),
            call(full_old_workspace_path_2, full_new_workspace_path_2)
        ]
        mock_move.assert_has_calls(two_calls)
        two_calls = [
            call(full_new_workspace_path_1, 0644),
            call(full_new_workspace_path_2, 0644)
        ]
        mock_chmod.assert_has_calls(two_calls)

        self.assertEqual(file_1.file_path, new_workspace_path_1)
        self.assertEqual(file_2.file_path, new_workspace_path_2)
Ejemplo n.º 5
0
    def test_success(self):
        """Tests calling ScaleFileManager.move_files() successfully"""

        workspace_1 = storage_test_utils.create_workspace()
        file_1 = storage_test_utils.create_file(file_name='my_file_1.txt', workspace=workspace_1)
        new_workspace_path_1 = os.path.join('my', 'new', 'path', '1', os.path.basename(file_1.file_path))
        file_2 = storage_test_utils.create_file(file_name='my_file_2.txt', workspace=workspace_1)
        new_workspace_path_2 = os.path.join('my', 'new', 'path', '2', os.path.basename(file_2.file_path))
        workspace_1.move_files = MagicMock()

        workspace_2 = storage_test_utils.create_workspace()
        file_3 = storage_test_utils.create_file(file_name='my_file_3.txt', workspace=workspace_2)
        new_workspace_path_3 = os.path.join('my', 'new', 'path', '3', os.path.basename(file_3.file_path))
        file_4 = storage_test_utils.create_file(file_name='my_file_4.txt', workspace=workspace_2)
        new_workspace_path_4 = os.path.join('my', 'new', 'path', '4', os.path.basename(file_4.file_path))
        workspace_2.move_files = MagicMock()

        files = [FileMove(file_1, new_workspace_path_1), FileMove(file_2, new_workspace_path_2),
                 FileMove(file_3, new_workspace_path_3), FileMove(file_4, new_workspace_path_4)]
        ScaleFile.objects.move_files(files)

        workspace_1.move_files.assert_called_once_with([FileMove(file_1, new_workspace_path_1),
                                                        FileMove(file_2, new_workspace_path_2)])
        workspace_2.move_files.assert_called_once_with([FileMove(file_3, new_workspace_path_3),
                                                        FileMove(file_4, new_workspace_path_4)])
Ejemplo n.º 6
0
    def test_move_source_file(self, mock_move_files):
        """Tests calling save_parse_results so that the source file is moved to a different path in the workspace"""

        new_path = os.path.join('the', 'new', 'workspace', 'path', self.src_file.file_name)

        # Call method to test
        SourceFile.objects.save_parse_results(self.src_file.id, None, None, None, [], new_path)

        # Check results
        mock_move_files.assert_called_once_with([FileMove(self.src_file, new_path)])
Ejemplo n.º 7
0
    def handle(self, file_id, remote_path, **options):
        """See :meth:`django.core.management.base.BaseCommand.handle`.

        This method starts the file move process.
        """

        logger.info('Command starting: scale_move_file')

        # Attempt to fetch the file model
        try:
            scale_file = ScaleFile.objects.get(pk=file_id)
        except ScaleFile.DoesNotExist:
            logger.exception('Stored file does not exist: %s', file_id)
            sys.exit(1)

        try:
            ScaleFile.objects.move_files([FileMove(scale_file, remote_path)])
        except:
            logger.exception('Unknown error occurred, exit code 1 returning')
            sys.exit(1)
        logger.info('Command completed: scale_move_file')
Ejemplo n.º 8
0
def move_files(file_ids, new_workspace=None, new_file_path=None):
    """Moves the given files to a different workspace/uri

    :param file_ids: List of ids of ScaleFile objects to move; should all be from the same workspace
    :type file_ids: [int]
    :param new_workspace: New workspace to move files to
    :type new_workspace: `storage.models.Workspace`
    :param new_file_path: New path for files
    :type new_file_path: string
    """

    try:
        messages = []
        files = ScaleFile.objects.all()
        files = files.select_related('workspace')
        files = files.defer('workspace__json_config')
        files = files.filter(id__in=file_ids).only('id', 'file_name',
                                                   'file_path', 'workspace')
        old_files = []
        old_workspace = files[0].workspace
        if new_workspace:
            # We need a local path to copy the file, try to get a direct path from the broker, if that fails we must
            # download the file and copy from there
            # TODO: a future refactor should make the brokers work off of file objects instead of paths so the extra
            # download is not necessary

            paths = old_workspace.get_file_system_paths([files])
            local_paths = []
            if paths:
                local_paths = paths
            else:
                file_downloads = []
                for file in files:
                    local_path = os.path.join('/tmp', file.file_name)
                    file_downloads.append(FileDownload(file, local_path,
                                                       False))
                    local_paths.append(local_path)
                ScaleFile.objects.download_files(file_downloads)

            uploads = []
            for file, path in zip(files, local_paths):
                old_path = file.file_path
                old_files.append(
                    ScaleFile(file_name=file.file_name,
                              file_path=file.file_path))
                file.file_path = new_file_path if new_file_path else file.file_path
                logger.info('Copying %s in workspace %s to %s in workspace %s',
                            old_path, file.workspace.name, file.file_path,
                            new_workspace.name)
                file_upload = FileUpload(file, path)
                uploads.append(file_upload)
                message = create_move_file_message(file_id=file.id)
                messages.append(message)

            ScaleFile.objects.upload_files(new_workspace, uploads)
        elif new_file_path:
            moves = []
            for file in files:
                logger.info('Moving %s to %s in workspace %s', file.file_path,
                            new_file_path, file.workspace.name)
                moves.append(FileMove(file, new_file_path))
                message = create_move_file_message(file_id=file.id)
                messages.append(message)

            ScaleFile.objects.move_files(moves)
        else:
            logger.info('No new workspace or file path. Doing nothing')

        CommandMessageManager().send_messages(messages)

        if new_workspace:
            # Copied files to new workspace, so delete file in old workspace (if workspace provides local path to do so)
            old_workspace.delete_files(old_files, update_model=False)

    except ScaleError as err:
        err.log()
        sys.exit(err.exit_code)
    except Exception as ex:
        exit_code = GENERAL_FAIL_EXIT_CODE
        err = get_error_by_exception(ex.__class__.__name__)
        if err:
            err.log()
            exit_code = err.exit_code
        else:
            logger.exception('Error performing move_files steps')
        sys.exit(exit_code)
Ejemplo n.º 9
0
def perform_ingest(ingest_id):
    """Performs the ingest for the given ingest ID

    :param ingest_id: The ID of the ingest to perform
    :type ingest_id: int
    """

    ingest = _get_ingest(ingest_id)
    file_name = ingest.file_name

    if ingest.status in ['INGESTED', 'DUPLICATE']:
        logger.warning('%s already marked %s, nothing to do', file_name,
                       ingest.status)
        return

    _start_ingest(ingest)
    if ingest.status != 'INGESTING':
        return

    try:
        source_file = ingest.source_file
        if source_file.is_deleted:
            # Source file still marked as deleted, so we must copy/move/register the file
            source_file.set_basic_fields(file_name, ingest.file_size,
                                         ingest.media_type,
                                         ingest.get_data_type_tags())
            source_file.update_uuid(
                file_name)  # Add a stable identifier based on the file name
            source_file.workspace = ingest.workspace
            source_file.file_path = ingest.file_path
            source_file.is_deleted = False
            source_file.is_parsed = False
            source_file.deleted = None
            source_file.parsed = None

            if ingest.new_workspace:
                # We need a local path to copy the file, try to get a direct path from the broker, if that fails we must
                # download the file and copy from there
                # TODO: a future refactor should make the brokers work off of file objects instead of paths so the extra
                # download is not necessary
                paths = ingest.workspace.get_file_system_paths([source_file])
                if paths:
                    local_path = paths[0]
                else:
                    local_path = os.path.join('/tmp', file_name)
                    file_download = FileDownload(source_file, local_path,
                                                 False)
                    ScaleFile.objects.download_files([file_download])
                source_file.file_path = ingest.new_file_path if ingest.new_file_path else ingest.file_path
                logger.info('Copying %s in workspace %s to %s in workspace %s',
                            ingest.file_path, ingest.workspace.name,
                            source_file.file_path, ingest.new_workspace.name)
                file_upload = FileUpload(source_file, local_path)
                ScaleFile.objects.upload_files(ingest.new_workspace,
                                               [file_upload])
            elif ingest.new_file_path:
                logger.info('Moving %s to %s in workspace %s',
                            ingest.file_path, ingest.new_file_path,
                            ingest.workspace.name)
                file_move = FileMove(source_file, ingest.new_file_path)
                ScaleFile.objects.move_files([file_move])
            else:
                logger.info('Registering %s in workspace %s', ingest.file_path,
                            ingest.workspace.name)
                _save_source_file(source_file)

        if ingest.new_workspace:
            # Copied file to new workspace, so delete file in old workspace (if workspace provides local path to do so)
            file_with_old_path = SourceFile.create()
            file_with_old_path.file_name = file_name
            file_with_old_path.file_path = ingest.file_path
            paths = ingest.workspace.get_file_system_paths(
                [file_with_old_path])
            if paths:
                _delete_file(paths[0])

    except Exception:
        _complete_ingest(ingest, 'ERRORED')
        raise

    _complete_ingest(ingest, 'INGESTED')
    logger.info('Ingest successful for %s', file_name)
Ejemplo n.º 10
0
    def save_parse_results(self, src_file_id, geo_json, data_started,
                           data_ended, data_types, new_workspace_path):
        """Saves the given parse results to the source file for the given ID. All database changes occur in an atomic
        transaction.

        :param src_file_id: The ID of the source file
        :type src_file_id: int
        :param geo_json: The associated geojson data, possibly None
        :type geo_json: dict
        :param data_started: The start time of the data contained in the source file, possibly None
        :type data_started: :class:`datetime.datetime` or None
        :param data_ended: The end time of the data contained in the source file, possibly None
        :type data_ended: :class:`datetime.datetime` or None
        :param data_types: List of strings containing the data types tags for this source file.
        :type data_types: [string]
        :param new_workspace_path: New workspace path to move the source file to now that parse data is available. If
            None, the source file should not be moved.
        :type new_workspace_path: str
        """

        geom = None
        props = None
        if geo_json:
            geom, props = geo_utils.parse_geo_json(geo_json)

        # Acquire model lock
        src_file = ScaleFile.objects.select_for_update().get(
            pk=src_file_id, file_type='SOURCE')
        src_file.is_parsed = True
        src_file.parsed = now()
        if data_started and not data_ended:
            src_file.data_started = data_started
            src_file.data_ended = data_started
        elif not data_started and data_ended:
            src_file.data_started = data_ended
            src_file.data_ended = data_ended
        elif not data_ended and not data_started:
            src_file.data_started = None
            src_file.data_ended = None
        else:
            src_file.data_started = data_started
            src_file.data_ended = data_ended
        src_file.source_started = src_file.data_started
        src_file.source_ended = src_file.data_ended
        for tag in data_types:
            src_file.add_data_type_tag(tag)
        if geom:
            src_file.geometry = geom
            src_file.center_point = geo_utils.get_center_point(geom)
        if props:
            src_file.meta_data = props
        # src_file already exists so we don't need to save/set_countries/save, just a single save is fine
        src_file.set_countries()
        src_file.save()

        try:
            # Try to update corresponding ingest models with this file's data time
            from ingest.models import Ingest
            Ingest.objects.filter(source_file_id=src_file_id).update(
                data_started=data_started, data_ended=data_ended)
        except ImportError:
            pass

        # Move the source file if a new workspace path is provided and the workspace allows it
        old_workspace_path = src_file.file_path
        if new_workspace_path and src_file.workspace.is_move_enabled:
            ScaleFile.objects.move_files(
                [FileMove(src_file, new_workspace_path)])
Ejemplo n.º 11
0
    def save_parse_results(self, src_file_id, geo_json, data_started,
                           data_ended, data_types, new_workspace_path):
        """Saves the given parse results to the source file for the given ID. All database changes occur in an atomic
        transaction.

        :param src_file_id: The ID of the source file
        :type src_file_id: int
        :param geo_json: The associated geojson data, possibly None
        :type geo_json: dict
        :param data_started: The start time of the data contained in the source file, possibly None
        :type data_started: :class:`datetime.datetime` or None
        :param data_ended: The end time of the data contained in the source file, possibly None
        :type data_ended: :class:`datetime.datetime` or None
        :param data_types: List of strings containing the data types tags for this source file.
        :type data_types: [string]
        :param new_workspace_path: New workspace path to move the source file to now that parse data is available. If
            None, the source file should not be moved.
        :type new_workspace_path: str
        """

        geom = None
        props = None
        if geo_json:
            geom, props = geo_utils.parse_geo_json(geo_json)

        # Acquire model lock
        src_file = SourceFile.objects.select_for_update().get(pk=src_file_id)
        src_file.is_parsed = True
        src_file.parsed = now()
        src_file.data_started = data_started
        src_file.data_ended = data_ended
        target_date = src_file.data_started
        if target_date is None:
            target_date = src_file.data_ended
        if target_date is None:
            target_date = src_file.created
        for tag in data_types:
            src_file.add_data_type_tag(tag)
        if geom:
            src_file.geometry = geom
            src_file.center_point = geo_utils.get_center_point(geom)
        src_file.meta_data = props
        # src_file already exists so we don't need to save/set_countries/save, just a single save is fine
        src_file.set_countries()
        src_file.save()

        # Move the source file if a new workspace path is provided and the workspace allows it
        old_workspace_path = src_file.file_path
        if new_workspace_path and src_file.workspace.is_move_enabled:
            ScaleFile.objects.move_files(
                [FileMove(src_file, new_workspace_path)])

        try:
            # Check trigger rules for parsed source files
            ParseTriggerHandler().process_parsed_source_file(src_file)
        except Exception:
            # Move file back if there was an error
            if new_workspace_path and src_file.workspace.is_move_enabled:
                ScaleFile.objects.move_files(
                    [FileMove(src_file, old_workspace_path)])
            raise