Example #1
0
    def purge_run(self, event):
        """Run purge for the object with ``data_id`` specified in ``event`` argument."""
        data_id = event['data_id']
        verbosity = event['verbosity']

        try:
            logger.info(__("Running purge for Data id {}.", data_id))
            data_purge(data_ids=[data_id], verbosity=verbosity, delete=True)
        except Exception:  # pylint: disable=broad-except
            logger.exception("Error while purging data object.",
                             extra={'data_id': data_id})
Example #2
0
    def create_and_run_processor(self, processor, **kwargs):
        processor_slug = get_random_string(6)
        Process.objects.create(slug=processor_slug,
                               name='Test Purge Process',
                               contributor=self.admin,
                               type='data:test',
                               version=1,
                               **processor)

        data = self.run_process(processor_slug, **kwargs)
        # Purge is normally called in an async worker, so we have to emulate the call.
        purge.data_purge(data_ids=[data.id], delete=True)

        return data
Example #3
0
    def test_remove(self, manager_mock):
        user = get_user_model().objects.create(username="******")
        processor = Process.objects.create(
            name='Test process',
            contributor=user,
            output_schema=[
                {'name': 'sample', 'type': 'basic:file:'}
            ]
        )

        data = {
            'name': 'Test data',
            'contributor': user,
            'process': processor,
        }

        completed_data = Data.objects.create(**data)
        completed_data.status = Data.STATUS_DONE
        completed_data.output = {'sample': {'file': 'test-file'}}
        self.create_test_file(completed_data, 'test-file')
        self.create_test_file(completed_data, 'removeme')
        completed_data.save()

        pending_data = Data.objects.create(**data)
        self.create_test_file(pending_data, 'test-file')
        self.create_test_file(pending_data, 'donotremoveme')

        # Check that nothing is removed if delete is False (the default).
        with patch('resolwe.flow.utils.purge.os', wraps=os) as os_mock:
            os_mock.path.isfile = MagicMock(return_value=True)
            os_mock.remove = MagicMock()
            purge.data_purge()
            os_mock.remove.assert_not_called()

        # Check that only the 'removeme' file from the completed Data objects is removed
        # and files from the second (not completed) Data objects are unchanged.
        with patch('resolwe.flow.utils.purge.os', wraps=os) as os_mock:
            os_mock.path.isfile = MagicMock(return_value=True)
            os_mock.remove = MagicMock()
            purge.data_purge(delete=True)
            os_mock.remove.assert_called_once_with(
                os.path.join(settings.FLOW_EXECUTOR['DATA_DIR'], str(completed_data.pk), 'removeme'))

        # Create another data object and check that if remove is called on one object,
        # only that object's data is removed.
        another_data = Data.objects.create(**data)
        another_data.status = Data.STATUS_DONE
        another_data.output = {'sample': {'file': 'test-file'}}
        self.create_test_file(another_data, 'test-file')
        self.create_test_file(another_data, 'removeme')
        another_data.save()

        with patch('resolwe.flow.utils.purge.os', wraps=os) as os_mock:
            os_mock.path.isfile = MagicMock(return_value=True)
            os_mock.remove = MagicMock()
            purge.data_purge(data_ids=[another_data.pk], delete=True)
            os_mock.remove.assert_called_once_with(
                os.path.join(settings.FLOW_EXECUTOR['DATA_DIR'], str(another_data.pk), 'removeme'))
Example #4
0
    def handle_finish(self, obj):
        """Handle an incoming ``Data`` finished processing request.

        :param obj: The Channels message object. Command object format:

            .. code-block:: none

                {
                    'command': 'finish',
                    'data_id': [id of the :class:`~resolwe.flow.models.Data` object
                               this command changes],
                    'process_rc': [exit status of the processing]
                    'spawn_processes': [optional; list of spawn dictionaries],
                    'exported_files_mapper': [if spawn_processes present]
                }
        """
        data_id = obj[ExecutorProtocol.DATA_ID]
        logger.debug(__("Finishing Data with id {} (handle_finish).", data_id),
                     extra={
                         'data_id': data_id,
                         'packet': obj
                     })

        with transaction.atomic():
            # Spawn any new jobs in the request.
            spawned = False
            if ExecutorProtocol.FINISH_SPAWN_PROCESSES in obj:
                if is_testing():
                    # NOTE: This is a work-around for Django issue #10827
                    # (https://code.djangoproject.com/ticket/10827), same as in
                    # TestCaseHelpers._pre_setup(). Because the listener is running
                    # independently, it must clear the cache on its own.
                    ContentType.objects.clear_cache()

                spawned = True
                exported_files_mapper = obj[
                    ExecutorProtocol.FINISH_EXPORTED_FILES]
                logger.debug(__(
                    "Spawning new Data objects for Data with id {} (handle_finish).",
                    data_id),
                             extra={'data_id': data_id})

                try:
                    # This transaction is needed because we're running
                    # asynchronously with respect to the main Django code
                    # here; the manager can get nudged from elsewhere.
                    with transaction.atomic():
                        parent_data = Data.objects.get(pk=data_id)

                        # Spawn processes.
                        for d in obj[ExecutorProtocol.FINISH_SPAWN_PROCESSES]:
                            d['contributor'] = parent_data.contributor
                            d['process'] = Process.objects.filter(
                                slug=d['process']).latest()

                            for field_schema, fields in iterate_fields(
                                    d.get('input', {}),
                                    d['process'].input_schema):
                                type_ = field_schema['type']
                                name = field_schema['name']
                                value = fields[name]

                                if type_ == 'basic:file:':
                                    fields[name] = self.hydrate_spawned_files(
                                        exported_files_mapper, value, data_id)
                                elif type_ == 'list:basic:file:':
                                    fields[name] = [
                                        self.hydrate_spawned_files(
                                            exported_files_mapper, fn, data_id)
                                        for fn in value
                                    ]

                            with transaction.atomic():
                                d = Data.objects.create(**d)
                                DataDependency.objects.create(
                                    parent=parent_data,
                                    child=d,
                                    kind=DataDependency.KIND_SUBPROCESS,
                                )

                                # Copy permissions.
                                copy_permissions(parent_data, d)

                                # Entity is added to the collection only when it is
                                # created - when it only contains 1 Data object.
                                entities = Entity.objects.filter(
                                    data=d).annotate(
                                        num_data=Count('data')).filter(
                                            num_data=1)

                                # Copy collections.
                                for collection in parent_data.collection_set.all(
                                ):
                                    collection.data.add(d)

                                    # Add entities to which data belongs to the collection.
                                    for entity in entities:
                                        entity.collections.add(collection)

                except Exception:  # pylint: disable=broad-except
                    logger.error(__(
                        "Error while preparing spawned Data objects of process '{}' (handle_finish):\n\n{}",
                        parent_data.process.slug, traceback.format_exc()),
                                 extra={'data_id': data_id})

            # Data wrap up happens last, so that any triggered signals
            # already see the spawned children. What the children themselves
            # see is guaranteed by the transaction we're in.
            if ExecutorProtocol.FINISH_PROCESS_RC in obj:
                process_rc = obj[ExecutorProtocol.FINISH_PROCESS_RC]

                try:
                    d = Data.objects.get(pk=data_id)
                except Data.DoesNotExist:
                    logger.warning(
                        "Data object does not exist (handle_finish).",
                        extra={
                            'data_id': data_id,
                        })
                    async_to_sync(self._send_reply)(obj, {
                        ExecutorProtocol.RESULT:
                        ExecutorProtocol.RESULT_ERROR
                    })
                    return

                if process_rc == 0 and not d.status == Data.STATUS_ERROR:
                    changeset = {
                        'status': Data.STATUS_DONE,
                        'process_progress': 100,
                        'finished': now()
                    }
                else:
                    changeset = {
                        'status': Data.STATUS_ERROR,
                        'process_progress': 100,
                        'process_rc': process_rc,
                        'finished': now()
                    }
                obj[ExecutorProtocol.UPDATE_CHANGESET] = changeset
                self.handle_update(obj, internal_call=True)

                if not getattr(settings, 'FLOW_MANAGER_KEEP_DATA', False):
                    try:
                        # Clean up after process
                        data_purge(data_ids=[data_id],
                                   delete=True,
                                   verbosity=self._verbosity)
                    except Exception:  # pylint: disable=broad-except
                        logger.error(__("Purge error:\n\n{}",
                                        traceback.format_exc()),
                                     extra={'data_id': data_id})

        # Notify the executor that we're done.
        async_to_sync(self._send_reply)(
            obj, {
                ExecutorProtocol.RESULT: ExecutorProtocol.RESULT_OK
            })

        # Now nudge the main manager to perform final cleanup. This is
        # needed even if there was no spawn baggage, since the manager
        # may need to know when executors have finished, to keep count
        # of them and manage synchronization.
        async_to_sync(consumer.send_event)({
            WorkerProtocol.COMMAND:
            WorkerProtocol.FINISH,
            WorkerProtocol.DATA_ID:
            data_id,
            WorkerProtocol.FINISH_SPAWNED:
            spawned,
            WorkerProtocol.FINISH_COMMUNICATE_EXTRA: {
                'executor':
                getattr(settings, 'FLOW_EXECUTOR',
                        {}).get('NAME', 'resolwe.flow.executors.local'),
            },
        })
Example #5
0
    def test_remove(self, manager_mock):
        user = get_user_model().objects.create(username="******")
        processor = Process.objects.create(name='Test process',
                                           contributor=user,
                                           output_schema=[{
                                               'name': 'sample',
                                               'type': 'basic:file:'
                                           }])

        data = {
            'name': 'Test data',
            'contributor': user,
            'process': processor,
        }

        completed_data = Data.objects.create(**data)
        completed_data.status = Data.STATUS_DONE
        completed_data.output = {'sample': {'file': 'test-file'}}
        self.create_test_file(completed_data, 'test-file')
        self.create_test_file(completed_data, 'removeme')
        completed_data.save()

        pending_data = Data.objects.create(**data)
        self.create_test_file(pending_data, 'test-file')
        self.create_test_file(pending_data, 'donotremoveme')

        # Check that nothing is removed if delete is False (the default).
        with patch('resolwe.flow.utils.purge.os', wraps=os) as os_mock:
            os_mock.path.isfile = MagicMock(return_value=True)
            os_mock.remove = MagicMock()
            purge.data_purge()
            os_mock.remove.assert_not_called()

        # Check that only the 'removeme' file from the completed Data objects is removed
        # and files from the second (not completed) Data objects are unchanged.
        with patch('resolwe.flow.utils.purge.os', wraps=os) as os_mock:
            os_mock.path.isfile = MagicMock(return_value=True)
            os_mock.remove = MagicMock()
            purge.data_purge(delete=True)
            os_mock.remove.assert_called_once_with(
                os.path.join(settings.FLOW_EXECUTOR['DATA_DIR'],
                             str(completed_data.pk), 'removeme'))

        # Create dummy data directories for non-existant data objects.
        self.create_test_file(990, 'dummy')
        self.create_test_file(991, 'dummy')

        # Check that only the 'removeme' file from the completed Data objects is removed
        # together with directories not belonging to any data objects.
        with patch('resolwe.flow.utils.purge.os', wraps=os) as os_mock:
            os_mock.path.isfile = MagicMock(return_value=True)
            os_mock.remove = MagicMock()
            purge.data_purge(delete=True)
            self.assertEqual(os_mock.remove.call_count, 3)
            os_mock.remove.assert_any_call(
                os.path.join(settings.FLOW_EXECUTOR['DATA_DIR'],
                             str(completed_data.pk), 'removeme'))
            os_mock.remove.assert_any_call(
                os.path.join(settings.FLOW_EXECUTOR['DATA_DIR'], '990'))
            os_mock.remove.assert_any_call(
                os.path.join(settings.FLOW_EXECUTOR['DATA_DIR'], '991'))

        # Create another data object and check that if remove is called on one object,
        # only that object's data is removed.
        another_data = Data.objects.create(**data)
        another_data.status = Data.STATUS_DONE
        another_data.output = {'sample': {'file': 'test-file'}}
        self.create_test_file(another_data, 'test-file')
        self.create_test_file(another_data, 'removeme')
        another_data.save()

        with patch('resolwe.flow.utils.purge.os', wraps=os) as os_mock:
            os_mock.path.isfile = MagicMock(return_value=True)
            os_mock.remove = MagicMock()
            purge.data_purge(data_ids=[another_data.pk], delete=True)
            os_mock.remove.assert_called_once_with(
                os.path.join(settings.FLOW_EXECUTOR['DATA_DIR'],
                             str(another_data.pk), 'removeme'))
Example #6
0
    def run(self, data_id, script, verbosity=1):
        """Execute the script and save results."""
        if verbosity >= 1:
            print('RUN: {} {}'.format(data_id, script))

        self.data_id = data_id

        # Fetch data instance to get any executor requirements.
        self.requirements = Data.objects.get(
            pk=data_id).process.requirements.get('executor',
                                                 {}).get(self.name, {})

        data_dir = settings.FLOW_EXECUTOR['DATA_DIR']
        dir_mode = getattr(settings, 'FLOW_EXECUTOR',
                           {}).get('DATA_DIR_MODE', 0o755)

        output_path = os.path.join(data_dir, str(data_id))

        os.mkdir(output_path)
        # os.mkdir is not guaranteed to set the given mode
        os.chmod(output_path, dir_mode)
        os.chdir(output_path)

        log_file = open('stdout.txt', 'w+')
        json_file = open('jsonout.txt', 'w+')

        proc_pid = self.start()

        self.update_data_status(status=Data.STATUS_PROCESSING,
                                started=now(),
                                process_pid=proc_pid)

        # Run processor and handle intermediate results
        self.run_script(script)
        spawn_processors = []
        output = {}
        process_error, process_warning, process_info = [], [], []
        process_progress, process_rc = 0, 0

        # read processor output
        try:
            stdout = self.get_stdout()
            while True:
                line = stdout.readline()
                if not line:
                    break

                try:
                    if line.strip().startswith('run'):
                        # Save processor and spawn if no errors
                        log_file.write(line)
                        log_file.flush()

                        for obj in iterjson(line[3:].strip()):
                            spawn_processors.append(obj)
                    elif line.strip().startswith('export'):
                        file_name = line[6:].strip()

                        export_folder = settings.FLOW_EXECUTOR['UPLOAD_DIR']
                        unique_name = 'export_{}'.format(uuid.uuid4().hex)
                        export_path = os.path.join(export_folder, unique_name)

                        self.exported_files_mapper[
                            self.data_id][file_name] = unique_name

                        shutil.move(file_name, export_path)
                    else:
                        # If JSON, save to MongoDB
                        updates = {}
                        for obj in iterjson(line):
                            for key, val in six.iteritems(obj):
                                if key.startswith('proc.'):
                                    if key == 'proc.error':
                                        process_error.append(val)
                                        if not process_rc:
                                            process_rc = 1
                                            updates['process_rc'] = process_rc
                                        updates[
                                            'process_error'] = process_error
                                        updates['status'] = Data.STATUS_ERROR
                                    elif key == 'proc.warning':
                                        process_warning.append(val)
                                        updates[
                                            'process_warning'] = process_warning
                                    elif key == 'proc.info':
                                        process_info.append(val)
                                        updates['process_info'] = process_info
                                    elif key == 'proc.rc':
                                        process_rc = int(val)
                                        updates['process_rc'] = process_rc
                                        if process_rc != 0:
                                            updates[
                                                'status'] = Data.STATUS_ERROR
                                    elif key == 'proc.progress':
                                        process_progress = int(
                                            float(val) * 100)
                                        updates[
                                            'process_progress'] = process_progress
                                else:
                                    dict_dot(output, key, val)
                                    updates['output'] = output

                        if updates:
                            updates['modified'] = now()
                            self.update_data_status(**updates)

                        if process_rc > 0:
                            log_file.close()
                            json_file.close()
                            os.chdir(CWD)
                            return

                        # Debug output
                        # Not referenced in Data object
                        json_file.write(line)
                        json_file.flush()

                except ValueError as ex:
                    # Ignore if not JSON
                    log_file.write(line)
                    log_file.flush()

        except MemoryError as ex:
            logger.error(__("Out of memory: {}", ex))

        except IOError as ex:
            # TODO: if ex.errno == 28: no more free space
            raise ex
        finally:
            # Store results
            log_file.close()
            json_file.close()
            os.chdir(CWD)

        return_code = self.end()

        if process_rc < return_code:
            process_rc = return_code

        if spawn_processors and process_rc == 0:
            parent_data = Data.objects.get(pk=self.data_id)

            # Spawn processors
            for d in spawn_processors:
                d['contributor'] = parent_data.contributor
                d['process'] = Process.objects.filter(
                    slug=d['process']).order_by('version').last()

                for field_schema, fields in iterate_fields(
                        d.get('input', {}), d['process'].input_schema):
                    type_ = field_schema['type']
                    name = field_schema['name']
                    value = fields[name]

                    if type_ == 'basic:file:':
                        fields[name] = self.hydrate_spawned_files(
                            value, data_id)
                    elif type_ == 'list:basic:file:':
                        fields[name] = [
                            self.hydrate_spawned_files(fn, data_id)
                            for fn in value
                        ]

                with transaction.atomic():
                    d = Data.objects.create(**d)
                    for collection in parent_data.collection_set.all():
                        collection.data.add(d)

        if process_rc == 0:
            self.update_data_status(status=Data.STATUS_DONE,
                                    process_progress=100,
                                    finished=now())
        else:
            self.update_data_status(status=Data.STATUS_ERROR,
                                    process_progress=100,
                                    process_rc=process_rc,
                                    finished=now())

        try:
            # Cleanup after processor
            data_purge(data_ids=[data_id], delete=True, verbosity=verbosity)
        except:  # pylint: disable=bare-except
            logger.error(__("Purge error:\n\n{}", traceback.format_exc()))
Example #7
0
    def run_process(self,
                    process_slug,
                    input_={},
                    assert_status=Data.STATUS_DONE,
                    descriptor=None,
                    descriptor_schema=None,
                    verbosity=0,
                    tags=None):
        """Run the specified process with the given inputs.

        If input is a file, file path should be given relative to the
        ``tests/files`` directory of a Django application.
        If ``assert_status`` is given, check if
        :class:`~resolwe.flow.models.Data` object's status matches
        it after the process has finished.

        .. note::

            If you need to delay calling the manager, you must put the
            desired code in a ``with transaction.atomic()`` block.

        :param str process_slug: slug of the
            :class:`~resolwe.flow.models.Process` to run

        :param dict ``input_``: :class:`~resolwe.flow.models.Process`'s
            input parameters

            .. note::

                You don't have to specify parameters with defined
                default values.

        :param str ``assert_status``: desired status of the
            :class:`~resolwe.flow.models.Data` object

        :param dict descriptor: descriptor to set on the
            :class:`~resolwe.flow.models.Data` object

        :param dict descriptor_schema: descriptor schema to set on the
            :class:`~resolwe.flow.models.Data` object

        :param list tags: list of tags that will be added to the created
            :class:`~resolwe.flow.models.Data` object

        :return: object created by
            :class:`~resolwe.flow.models.Process`
        :rtype: ~resolwe.flow.models.Data

        """
        # Copy input_, to avoid mutation that would occur in ``mock_upload``
        input_ = input_.copy()

        # backward compatibility
        process_slug = slugify(process_slug.replace(':', '-'))

        # Enforce correct process tags.
        if getattr(settings, 'TEST_PROCESS_REQUIRE_TAGS',
                   False) and not self._preparation_stage:
            test = getattr(self, self._testMethodName)
            if not has_process_tag(test, process_slug):
                self.fail(
                    'Tried to run process with slug "{0}" outside of preparation_stage\n'
                    'block while test is not tagged for this process. Either tag the\n'
                    'test using tag_process decorator or move this under the preparation\n'
                    'stage block if this process is only used to prepare upstream inputs.\n'
                    '\n'
                    'To tag the test you can add the following decorator:\n'
                    '    @tag_process(\'{0}\')\n'
                    ''.format(process_slug))

        self._executed_processes.add(process_slug)

        process = Process.objects.filter(
            slug=process_slug).order_by('-version').first()

        if process is None:
            self.fail('No process with slug "{}"'.format(process_slug))

        def mock_upload(file_path):
            """Mock file upload."""
            def is_url(path):
                """Check if path is a URL."""
                validate = URLValidator()
                try:
                    validate(path)
                except (ValueError, ValidationError):
                    return False
                return True

            if is_url(file_path):
                return {
                    'file': file_path,
                    'file_temp': file_path,
                    'is_remote': True,
                }
            else:
                old_path = os.path.join(self.files_path, file_path)
                if not os.path.isfile(old_path):
                    raise RuntimeError('Missing file: {}'.format(old_path))

                file_temp = '{}_{}'.format(file_path, uuid.uuid4())
                upload_file_path = os.path.join(self.upload_dir, file_temp)
                # create directories needed by new_path
                upload_file_dir = os.path.dirname(upload_file_path)
                if not os.path.exists(upload_file_dir):
                    os.makedirs(upload_file_dir)

                shutil.copy2(old_path, upload_file_path)
                self._upload_files.append(upload_file_path)
                return {
                    'file': file_path,
                    'file_temp': file_temp,
                }

        for field_schema, fields in iterate_fields(input_,
                                                   process.input_schema):
            # copy referenced files to upload dir
            if field_schema['type'] == "basic:file:":
                fields[field_schema['name']] = mock_upload(
                    fields[field_schema['name']])
            elif field_schema['type'] == "list:basic:file:":
                file_list = [
                    mock_upload(file_path)
                    for file_path in fields[field_schema['name']]
                ]
                fields[field_schema['name']] = file_list

            # convert primary keys to strings
            if field_schema['type'].startswith('data:'):
                fields[field_schema['name']] = fields[field_schema['name']]
            if field_schema['type'].startswith('list:data:'):
                fields[field_schema['name']] = [
                    obj for obj in fields[field_schema['name']]
                ]

        data = Data.objects.create(input=input_,
                                   contributor=self.admin,
                                   process=process,
                                   slug=get_random_string(length=6),
                                   tags=tags or [],
                                   descriptor_schema=descriptor_schema,
                                   descriptor=descriptor or {})
        self.collection.data.add(data)

        # Fetch latest Data object from database
        data = Data.objects.get(pk=data.pk)

        if assert_status:
            if not transaction.get_autocommit(
            ) and assert_status == Data.STATUS_DONE:
                # We are in an atomic transaction block, hence the data object will not be done
                # until after the block. Therefore the expected status is resolving.
                assert_status = Data.STATUS_RESOLVING
            self.assertStatus(data, assert_status)

        # Purge is normally called in an async worker, so we have to emulate the call.
        purge.data_purge(data_ids=[data.id], delete=True)

        return data
Example #8
0
    def run(self, data_id, script, verbosity=1):
        """Execute the script and save results."""
        if verbosity >= 1:
            print('RUN: {} {}'.format(data_id, script))

        self.data_id = data_id

        data_dir = settings.FLOW_EXECUTOR['DATA_DIR']
        dir_mode = getattr(settings, 'FLOW_EXECUTOR', {}).get('DATA_DIR_MODE', 0o755)

        output_path = os.path.join(data_dir, str(data_id))

        os.mkdir(output_path)
        # os.mkdir is not guaranteed to set the given mode
        os.chmod(output_path, dir_mode)
        os.chdir(output_path)

        log_file = open('stdout.txt', 'w+')
        json_file = open('jsonout.txt', 'w+')

        proc_pid = self.start()

        self.update_data_status(
            status=Data.STATUS_PROCESSING,
            started=now(),
            process_pid=proc_pid
        )

        # Run processor and handle intermediate results
        self.run_script(script)
        spawn_processors = []
        output = {}
        process_error, process_warning, process_info = [], [], []
        process_progress, process_rc = 0, 0

        # read processor output
        try:
            stdout = self.get_stdout()
            while True:
                line = stdout.readline()
                if not line:
                    break

                try:
                    if line.strip().startswith('run'):
                        # Save processor and spawn if no errors
                        log_file.write(line)
                        log_file.flush()

                        for obj in iterjson(line[3:].strip()):
                            spawn_processors.append(obj)
                    elif line.strip().startswith('export'):
                        file_name = line[6:].strip()

                        export_folder = settings.FLOW_EXECUTOR['UPLOAD_DIR']
                        unique_name = 'export_{}'.format(uuid.uuid4().hex)
                        export_path = os.path.join(export_folder, unique_name)

                        EXPORTED_FILES_MAPPER[file_name] = unique_name

                        shutil.move(file_name, export_path)
                    else:
                        # If JSON, save to MongoDB
                        updates = {}
                        for obj in iterjson(line):
                            for key, val in six.iteritems(obj):
                                if key.startswith('proc.'):
                                    if key == 'proc.error':
                                        process_error.append(val)
                                        if not process_rc:
                                            process_rc = 1
                                            updates['process_rc'] = process_rc
                                        updates['process_error'] = process_error
                                        updates['status'] = Data.STATUS_ERROR
                                    elif key == 'proc.warning':
                                        process_warning.append(val)
                                        updates['process_warning'] = process_warning
                                    elif key == 'proc.info':
                                        process_info.append(val)
                                        updates['process_info'] = process_info
                                    elif key == 'proc.rc':
                                        process_rc = int(val)
                                        updates['process_rc'] = process_rc
                                        if process_rc != 0:
                                            updates['status'] = Data.STATUS_ERROR
                                    elif key == 'proc.progress':
                                        process_progress = int(float(val) * 100)
                                        updates['process_progress'] = process_progress
                                else:
                                    dict_dot(output, key, val)
                                    updates['output'] = output

                        if updates:
                            updates['modified'] = now()
                            self.update_data_status(**updates)

                        if process_rc > 0:
                            log_file.close()
                            json_file.close()
                            os.chdir(CWD)
                            return

                        # Debug output
                        # Not referenced in Data object
                        json_file.write(line)
                        json_file.flush()

                except ValueError as ex:
                    # Ignore if not JSON
                    log_file.write(line)
                    log_file.flush()

        except MemoryError as ex:
            logger.error(__("Out of memory: {}", ex))

        except IOError as ex:
            # TODO: if ex.errno == 28: no more free space
            raise ex
        finally:
            # Store results
            log_file.close()
            json_file.close()
            os.chdir(CWD)

        return_code = self.end()

        if process_rc < return_code:
            process_rc = return_code

        if spawn_processors and process_rc == 0:
            parent_data = Data.objects.get(pk=self.data_id)

            # Spawn processors
            for d in spawn_processors:
                d['contributor'] = parent_data.contributor
                d['process'] = Process.objects.filter(slug=d['process']).order_by('version').last()

                for field_schema, fields in iterate_fields(d.get('input', {}), d['process'].input_schema):
                    type_ = field_schema['type']
                    name = field_schema['name']
                    value = fields[name]

                    if type_ == 'basic:file:':
                        fields[name] = hydrate_spawned_files(value, data_id)
                    elif type_ == 'list:basic:file:':
                        fields[name] = [hydrate_spawned_files(fn, data_id) for fn in value]

                with transaction.atomic():
                    d = Data.objects.create(**d)
                    for collection in parent_data.collection_set.all():
                        collection.data.add(d)

        if process_rc == 0:
            self.update_data_status(
                status=Data.STATUS_DONE,
                process_progress=100,
                finished=now()
            )
        else:
            self.update_data_status(
                status=Data.STATUS_ERROR,
                process_progress=100,
                process_rc=process_rc,
                finished=now()
            )

        try:
            # Cleanup after processor
            if data_id != 'no_data_id':
                data_purge(data_ids=[data_id], delete=True, verbosity=verbosity)
        except:  # pylint: disable=bare-except
            logger.error(__("Purge error:\n\n{}", traceback.format_exc()))
Example #9
0
 def handle(self, *args, **options):
     """Call :func:`~resolwe.flow.utils.purge.data_purge`."""
     data_purge(options['data'], options['force'])
Example #10
0
 def handle(self, *args, **options):
     """Call :func:`~resolwe.flow.utils.purge.data_purge`."""
     data_purge(options['data'], options['force'], options['verbosity'])