Exemple #1
0
    def test_success(self, mock_getsize):
        """Tests calling ScaleFileManager.upload_files() successfully"""
        def new_getsize(path):
            return 100
        mock_getsize.side_effect = new_getsize

        workspace = storage_test_utils.create_workspace()
        file_1 = ScaleFile()
        file_1.set_basic_fields('file.txt', 100, None)  # Scale should auto-detect text/plain
        remote_path_1 = 'my/remote/path/file.txt'
        local_path_1 = 'my/local/path/file.txt'
        file_1.file_path = remote_path_1
        file_2 = ScaleFile()
        file_2.set_basic_fields('file.json', 100, 'application/json')
        remote_path_2 = 'my/remote/path/2/file.json'
        local_path_2 = 'my/local/path/2/file.json'
        file_2.file_path = remote_path_2
        workspace.upload_files = MagicMock()

        files = [FileUpload(file_1, local_path_1), FileUpload(file_2, local_path_2)]
        models = ScaleFile.objects.upload_files(workspace, files)

        workspace.upload_files.assert_called_once_with([FileUpload(file_1, local_path_1),
                                                        FileUpload(file_2, local_path_2)])

        self.assertEqual('file.txt', models[0].file_name)
        self.assertEqual(remote_path_1, models[0].file_path)
        self.assertEqual('text/plain', models[0].media_type)
        self.assertEqual(workspace.id, models[0].workspace_id)
        self.assertEqual('file.json', models[1].file_name)
        self.assertEqual(remote_path_2, models[1].file_path)
        self.assertEqual('application/json', models[1].media_type)
        self.assertEqual(workspace.id, models[1].workspace_id)
Exemple #2
0
    def test_no_tags(self):
        """Tests calling get_data_type_tags() with no tags"""

        the_file = ScaleFile()
        tags = the_file.get_data_type_tags()

        self.assertSetEqual(tags, set())
Exemple #3
0
    def test_objects(self):
        """Tests calling update_uuid with multiple object types."""

        the_file = ScaleFile()
        the_file.update_uuid('test.txt', 1, True, {'key': 'value'})

        self.assertEqual(len(the_file.uuid), 32)
        self.assertEqual(the_file.uuid, 'ee6535359fbe02d50589a823951eb491')
Exemple #4
0
    def test_multi_strings(self):
        """Tests calling update_uuid with multiple strings."""

        the_file = ScaleFile()
        the_file.update_uuid('test.txt', 'test1', 'test2')

        self.assertEqual(len(the_file.uuid), 32)
        self.assertEqual(the_file.uuid, '8ff66acfc019330bba973b408c63ad15')
Exemple #5
0
    def test_one_string(self):
        """Tests calling update_uuid with a single string."""

        the_file = ScaleFile()
        the_file.update_uuid('test.txt')

        self.assertEqual(len(the_file.uuid), 32)
        self.assertEqual(the_file.uuid, 'dd18bf3a8e0a2a3e53e2661c7fb53534')
Exemple #6
0
    def test_tags(self):
        """Tests calling get_data_type_tags() with tags"""

        the_file = ScaleFile(data_type='A,B,c')
        tags = the_file.get_data_type_tags()

        correct_set = set()
        correct_set.add('A')
        correct_set.add('B')
        correct_set.add('c')

        self.assertSetEqual(tags, correct_set)
Exemple #7
0
    def test_same_tag(self):
        """Tests calling add_data_type_tag() with the same tag twice"""

        the_file = ScaleFile()
        the_file.add_data_type_tag('Hello-1')
        the_file.add_data_type_tag('Hello-1')
        tags = the_file.get_data_type_tags()

        correct_set = set()
        correct_set.add('Hello-1')

        self.assertSetEqual(tags, correct_set)
Exemple #8
0
    def test_valid(self):
        """Tests calling add_data_type_tag() with valid tags"""

        the_file = ScaleFile()
        the_file.add_data_type_tag('Hello-1')
        the_file.add_data_type_tag('foo_BAR')
        tags = the_file.get_data_type_tags()

        correct_set = set()
        correct_set.add('Hello-1')
        correct_set.add('foo_BAR')

        self.assertSetEqual(tags, correct_set)
Exemple #9
0
    def test_invalid(self):
        """Tests calling add_data_type_tag() with invalid tags"""

        the_file = ScaleFile()

        self.assertRaises(InvalidDataTypeTag, the_file.add_data_type_tag, 'my.invalid.tag')
        self.assertRaises(InvalidDataTypeTag, the_file.add_data_type_tag, 'my\invalid\tag!')
Exemple #10
0
    def test_partial(self):
        """Tests calling update_uuid with some ignored None types."""

        the_file1 = ScaleFile()
        the_file1.update_uuid('test.txt', 'test')

        the_file2 = ScaleFile()
        the_file2.update_uuid('test.txt', None, 'test', None)

        self.assertEqual(the_file1.uuid, the_file2.uuid)
Exemple #11
0
    def handle(self, *args, **options):
        """See :meth:`django.core.management.base.BaseCommand.handle`.

        This method starts the file upload process.
        """

        file_id = options.get('file_id')
        remote_path = options.get('remote_path')
        workspace_name = options.get('workspace')

        logger.info('Command starting: scale_upload_file')
        logger.info(' - Workspace: %s', workspace_name)

        # Validate the file paths
        file_name = os.path.basename(local_path)
        if not os.path.exists(local_path):
            logger.exception('Local file does not exist: %s', local_path)
            sys.exit(1)

        # Attempt to fetch the workspace model
        try:
            workspace = Workspace.objects.get(name=workspace_name)
        except Workspace.DoesNotExist:
            logger.exception('Workspace does not exist: %s', workspace_name)
            sys.exit(1)

        # Attempt to set up a file model
        try:
            scale_file = ScaleFile.objects.get(file_name=file_name)
        except ScaleFile.DoesNotExist:
            scale_file = ScaleFile()
            scale_file.update_uuid(file_name)
        scale_file.file_path = remote_path

        try:
            ScaleFile.objects.upload_files(
                workspace, [FileUpload(scale_file, local_path)])
        except:
            logger.exception('Unknown error occurred, exit code 1 returning')
            sys.exit(1)
        logger.info('Command completed: scale_upload_file')
Exemple #12
0
    def test_fails(self, mock_makedirs, mock_getsize):
        """Tests calling ScaleFileManager.upload_files() when Workspace.upload_files() fails"""
        def new_getsize(path):
            return 100
        mock_getsize.side_effect = new_getsize

        upload_dir = os.path.join('upload', 'dir')
        work_dir = os.path.join('work', 'dir')

        workspace = storage_test_utils.create_workspace()
        file_1 = ScaleFile()
        file_1.media_type = None  # Scale should auto-detect text/plain
        remote_path_1 = 'my/remote/path/file.txt'
        local_path_1 = 'my/local/path/file.txt'
        file_2 = ScaleFile()
        file_2.media_type = 'application/json'
        remote_path_2 = 'my/remote/path/2/file.json'
        local_path_2 = 'my/local/path/2/file.json'
        workspace.upload_files = MagicMock()
        workspace.upload_files.side_effect = Exception
        workspace.delete_files = MagicMock()
        delete_work_dir = os.path.join(work_dir, 'delete', get_valid_filename(workspace.name))

        files = [(file_1, local_path_1, remote_path_1), (file_2, local_path_2, remote_path_2)]
        self.assertRaises(Exception, ScaleFile.objects.upload_files, upload_dir, work_dir, workspace, files)
Exemple #13
0
    def handle(self, local_path, remote_path, **options):
        """See :meth:`django.core.management.base.BaseCommand.handle`.

        This method starts the file upload process.
        """

        workspace_name = options.get('workspace')

        logger.info('Command starting: scale_upload_file')
        logger.info(' - Workspace: %s', workspace_name)

        # Validate the file paths
        file_name = os.path.basename(local_path)
        if not os.path.exists(local_path):
            logger.exception('Local file does not exist: %s', local_path)
            sys.exit(1)

        # Attempt to fetch the workspace model
        try:
            workspace = Workspace.objects.get(name=workspace_name)
        except Workspace.DoesNotExist:
            logger.exception('Workspace does not exist: %s', workspace_name)
            sys.exit(1)

        # Attempt to set up a file model
        try:
            scale_file = ScaleFile.objects.get(file_name=file_name)
        except ScaleFile.DoesNotExist:
            scale_file = ScaleFile()
            scale_file.update_uuid(file_name)
        scale_file.file_path = remote_path

        try:
            ScaleFile.objects.upload_files(workspace, [FileUpload(scale_file, local_path)])
        except:
            logger.exception('Unknown error occurred, exit code 1 returning')
            sys.exit(1)
        logger.info('Command completed: scale_upload_file')
Exemple #14
0
    def test_success(self, mock_getsize):
        '''Tests calling ScaleFileManager.upload_files() successfully'''
        def new_getsize(path):
            return 100
        mock_getsize.side_effect = new_getsize

        upload_dir = os.path.join('upload', 'dir')
        work_dir = os.path.join('work', 'dir')

        workspace = storage_test_utils.create_workspace()
        file_1 = ScaleFile()
        file_1.media_type = None  # Scale should auto-detect text/plain
        remote_path_1 = u'my/remote/path/file.txt'
        local_path_1 = u'my/local/path/file.txt'
        file_2 = ScaleFile()
        file_2.media_type = u'application/json'
        remote_path_2 = u'my/remote/path/2/file.json'
        local_path_2 = u'my/local/path/2/file.json'
        workspace.upload_files = MagicMock()
        workspace.delete_files = MagicMock()
        workspace_work_dir = ScaleFile.objects._get_workspace_work_dir(work_dir, workspace)

        files = [(file_1, local_path_1, remote_path_1), (file_2, local_path_2, remote_path_2)]
        models = ScaleFile.objects.upload_files(upload_dir, work_dir, workspace, files)

        workspace.upload_files.assert_called_once_with(upload_dir, workspace_work_dir,
                                                       [(local_path_1, remote_path_1), (local_path_2, remote_path_2)])
        self.assertListEqual(workspace.delete_files.call_args_list, [])

        self.assertEqual(u'file.txt', models[0].file_name)
        self.assertEqual(remote_path_1, models[0].file_path)
        self.assertEqual(u'text/plain', models[0].media_type)
        self.assertEqual(workspace.id, models[0].workspace_id)
        self.assertEqual(u'file.json', models[1].file_name)
        self.assertEqual(remote_path_2, models[1].file_path)
        self.assertEqual(u'application/json', models[1].media_type)
        self.assertEqual(workspace.id, models[1].workspace_id)
Exemple #15
0
    def test_success(self, mock_getsize):
        '''Tests calling ScaleFileManager.upload_files() successfully'''
        def new_getsize(path):
            return 100

        mock_getsize.side_effect = new_getsize

        upload_dir = os.path.join('upload', 'dir')
        work_dir = os.path.join('work', 'dir')

        workspace = storage_test_utils.create_workspace()
        file_1 = ScaleFile()
        file_1.media_type = None  # Scale should auto-detect text/plain
        remote_path_1 = u'my/remote/path/file.txt'
        local_path_1 = u'my/local/path/file.txt'
        file_2 = ScaleFile()
        file_2.media_type = u'application/json'
        remote_path_2 = u'my/remote/path/2/file.json'
        local_path_2 = u'my/local/path/2/file.json'
        workspace.upload_files = MagicMock()
        workspace.delete_files = MagicMock()
        workspace_work_dir = ScaleFile.objects._get_workspace_work_dir(
            work_dir, workspace)

        files = [(file_1, local_path_1, remote_path_1),
                 (file_2, local_path_2, remote_path_2)]
        models = ScaleFile.objects.upload_files(upload_dir, work_dir,
                                                workspace, files)

        workspace.upload_files.assert_called_once_with(
            upload_dir, workspace_work_dir, [(local_path_1, remote_path_1),
                                             (local_path_2, remote_path_2)])
        self.assertListEqual(workspace.delete_files.call_args_list, [])

        self.assertEqual(u'file.txt', models[0].file_name)
        self.assertEqual(remote_path_1, models[0].file_path)
        self.assertEqual(u'text/plain', models[0].media_type)
        self.assertEqual(workspace.id, models[0].workspace_id)
        self.assertEqual(u'file.json', models[1].file_name)
        self.assertEqual(remote_path_2, models[1].file_path)
        self.assertEqual(u'application/json', models[1].media_type)
        self.assertEqual(workspace.id, models[1].workspace_id)
Exemple #16
0
def move_files(file_ids, new_workspace=None, new_file_path=None):
    """Moves the given files to a different workspace/uri

    :param file_ids: List of ids of ScaleFile objects to move; should all be from the same workspace
    :type file_ids: [int]
    :param new_workspace: New workspace to move files to
    :type new_workspace: `storage.models.Workspace`
    :param new_file_path: New path for files
    :type new_file_path: string
    """

    try:
        messages = []
        files = ScaleFile.objects.all()
        files = files.select_related('workspace')
        files = files.defer('workspace__json_config')
        files = files.filter(id__in=file_ids).only('id', 'file_name',
                                                   'file_path', 'workspace')
        old_files = []
        old_workspace = files[0].workspace
        if new_workspace:
            # We need a local path to copy the file, try to get a direct path from the broker, if that fails we must
            # download the file and copy from there
            # TODO: a future refactor should make the brokers work off of file objects instead of paths so the extra
            # download is not necessary

            paths = old_workspace.get_file_system_paths([files])
            local_paths = []
            if paths:
                local_paths = paths
            else:
                file_downloads = []
                for file in files:
                    local_path = os.path.join('/tmp', file.file_name)
                    file_downloads.append(FileDownload(file, local_path,
                                                       False))
                    local_paths.append(local_path)
                ScaleFile.objects.download_files(file_downloads)

            uploads = []
            for file, path in zip(files, local_paths):
                old_path = file.file_path
                old_files.append(
                    ScaleFile(file_name=file.file_name,
                              file_path=file.file_path))
                file.file_path = new_file_path if new_file_path else file.file_path
                logger.info('Copying %s in workspace %s to %s in workspace %s',
                            old_path, file.workspace.name, file.file_path,
                            new_workspace.name)
                file_upload = FileUpload(file, path)
                uploads.append(file_upload)
                message = create_move_file_message(file_id=file.id)
                messages.append(message)

            ScaleFile.objects.upload_files(new_workspace, uploads)
        elif new_file_path:
            moves = []
            for file in files:
                logger.info('Moving %s to %s in workspace %s', file.file_path,
                            new_file_path, file.workspace.name)
                moves.append(FileMove(file, new_file_path))
                message = create_move_file_message(file_id=file.id)
                messages.append(message)

            ScaleFile.objects.move_files(moves)
        else:
            logger.info('No new workspace or file path. Doing nothing')

        CommandMessageManager().send_messages(messages)

        if new_workspace:
            # Copied files to new workspace, so delete file in old workspace (if workspace provides local path to do so)
            old_workspace.delete_files(old_files, update_model=False)

    except ScaleError as err:
        err.log()
        sys.exit(err.exit_code)
    except Exception as ex:
        exit_code = GENERAL_FAIL_EXIT_CODE
        err = get_error_by_exception(ex.__class__.__name__)
        if err:
            err.log()
            exit_code = err.exit_code
        else:
            logger.exception('Error performing move_files steps')
        sys.exit(exit_code)
Exemple #17
0
    def test_none(self):
        """Tests calling update_uuid with no arguments."""

        the_file = ScaleFile()
        self.assertRaises(ValueError, the_file.update_uuid)
Exemple #18
0
    def _generate_input_metadata(self, job_exe):
        """Generate the input metadata file for the job execution

        :param job_id: The job ID
        :type job_id: int
        :param exe_num: The execution number
        :type exe_num: int
        """

        job_interface = job_exe.job_type.get_job_interface()

        if not job_interface.needs_input_metadata():
            return

        # Generate input metadata dict
        input_metadata = {}
        config = job_exe.get_execution_configuration
        if 'input_files' in config.get_dict():
            input_metadata['JOB'] = {}
            input_data = job_exe.job.get_input_data()
            for i in input_data.values.keys():
                if type(input_data.values[i]) is JsonValue:
                    input_metadata['JOB'][i] = input_data.values[i].value
                elif type(input_data.values[i]) is FileValue:
                    input_metadata['JOB'][i] = [
                        ScaleFile.objects.get(pk=f)._get_url()
                        for f in input_data.values[i].file_ids
                    ]
        if job_exe.recipe_id and job_exe.recipe.has_input():
            input_metadata['RECIPE'] = {}
            input_data = job_exe.recipe.get_input_data()
            for i in input_data.values.keys():
                if type(input_data.values[i]) is JsonValue:
                    input_metadata['RECIPE'][i] = input_data.values[i].value
                elif type(input_data.values[i]) is FileValue:
                    input_metadata['RECIPE'][i] = [
                        ScaleFile.objects.get(pk=f)._get_url()
                        for f in input_data.values[i].file_ids
                    ]

        workspace_names = config.get_input_workspace_names()
        workspace_models = {
            w.name: w
            for w in Workspace.objects.get_workspaces(names=workspace_names)
        }

        input_metadata_id = None
        if input_metadata:
            file_name = '%d-input_metadata.json' % job_exe.job.id
            local_path = os.path.join(SCALE_JOB_EXE_INPUT_PATH, 'tmp',
                                      file_name)
            with open(local_path, 'w') as metadata_file:
                json.dump(input_metadata, metadata_file)
                try:
                    scale_file = ScaleFile.objects.get(file_name=file_name)
                except ScaleFile.DoesNotExist:
                    scale_file = ScaleFile()
                    scale_file.update_uuid(file_name)
                remote_path = self._calculate_remote_path(job_exe)
                scale_file.file_path = remote_path

                for workspace in workspace_models:
                    try:
                        if not input_metadata_id:
                            ScaleFile.objects.upload_files(
                                workspace,
                                [FileUpload(scale_file, local_path)])
                            input_metadata_id = ScaleFile.objects.get(
                                file_name=file_name).id
                            data = job_exe.job.get_job_data()
                            data.add_file_input('INPUT_METADATA_MANIFEST',
                                                input_metadata_id)
                            job_exe.job.input = data.get_dict()
                            job_exe.job.save()
                    except:
                        continue
                if not input_metadata_id:
                    logger.exception(
                        'Error uploading input_metadata manifest for job_exe %d'
                        % job_exe.job.id)