Esempio n. 1
0
    def test_create_existing_task(self):
        """
        Creates a task with a name that does exist.
        """

        task = ProcessTask(
            name="ESSArch_Core.WorkflowEngine.tests.tasks.First",
            responsible=self.user
        )

        task.full_clean()
Esempio n. 2
0
    def test_running_non_eagerly(self):
        settings.CELERY_ALWAYS_EAGER = False
        foo = 123

        task = ProcessTask(
            name="ESSArch_Core.WorkflowEngine.tests.tasks.First",
            params={"foo": foo}
        )

        res = task.run().get().get(task.pk)
        self.assertEqual(res, foo)
Esempio n. 3
0
    def test_create_nonexistent_task(self):
        """
        Creates a task with a name that doesn't exist.
        """

        with self.assertRaises(ValidationError):
            task = ProcessTask(
                name="nonexistent task",
                responsible=self.user
            )

            task.full_clean()
Esempio n. 4
0
    def run(self,
            ip=None,
            xmlfile=None,
            validate_fileformat=True,
            validate_integrity=True,
            rootdir=None):
        step = ProcessStep.objects.create(name="Validate Files",
                                          parallel=True,
                                          parent_step_id=self.step)

        if any([validate_fileformat, validate_integrity]):
            if rootdir is None:
                rootdir = InformationPackage.objects.values_list(
                    'object_path', flat=True).get(pk=ip)

            tasks = []

            for f in find_files(xmlfile, rootdir):
                if validate_fileformat and f.format is not None:
                    tasks.append(
                        ProcessTask(
                            name=self.fileformat_task,
                            params={
                                "filename": os.path.join(rootdir, f.path),
                                "format_name": f.format,
                            },
                            information_package_id=ip,
                            responsible_id=self.responsible,
                            processstep=step,
                        ))

                if validate_integrity and f.checksum is not None and f.checksum_type is not None:
                    tasks.append(
                        ProcessTask(
                            name=self.checksum_task,
                            params={
                                "filename": os.path.join(rootdir, f.path),
                                "checksum": f.checksum,
                                "algorithm": f.checksum_type,
                            },
                            information_package_id=ip,
                            responsible_id=self.responsible,
                            processstep=step,
                        ))

            ProcessTask.objects.bulk_create(tasks)

        with allow_join_result():
            return step.run().get()
Esempio n. 5
0
    def local(self, src, dst, block_size=65536, step=None):
        step = ProcessStep.objects.create(name="Copy %s to %s" % (src, dst),
                                          parent_step_id=step)

        fsize = os.stat(src).st_size
        idx = 0

        tasks = []

        directory = os.path.dirname(dst)

        try:
            os.makedirs(directory)
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise

        open(dst, 'w').close()  # remove content of destination if it exists

        while idx * block_size <= fsize:
            tasks.append(
                ProcessTask(
                    name="ESSArch_Core.tasks.CopyChunk",
                    args=[src, dst, idx * block_size, self.task_id],
                    params={'block_size': block_size},
                    processstep=step,
                    processstep_pos=idx,
                ))
            idx += 1

        ProcessTask.objects.bulk_create(tasks, 1000)

        step.run().get()
Esempio n. 6
0
    def test_on_failure(self):
        """
        Runs an incorrect task and checks if the result is empty and that the
        traceback is nonempty.
        """

        foo = 123
        try:
            task = ProcessTask(
                name="ESSArch_Core.WorkflowEngine.tests.tasks.First",
                params={
                    "bar": foo
                },
                information_package=InformationPackage.objects.create()
            )
            task.run()
        except TypeError:
            tb = traceback.format_exc()
            self.assertEqual(tb, task.traceback)
            self.assertIsNone(task.result)
            self.assertIsNotNone(task.traceback)
    def test_create_physical_model(self):
        ip = InformationPackage.objects.create(Label="ip1")
        prepare_path = Path.objects.get(entity="path_preingest_prepare").value
        path = os.path.join(prepare_path, unicode(ip.pk))

        task = ProcessTask(
            name="preingest.tasks.CreatePhysicalModel",
            params={
                "structure": [{
                    "name": "dir1",
                    "type": "folder"
                }, {
                    "name": "dir2",
                    "type": "folder",
                }, {
                    "name": "file1",
                    "type": "file"
                }]
            },
            information_package=ip,
        )
        task.run()

        self.assertTrue(os.path.isdir(os.path.join(path, 'dir1')))
        self.assertTrue(os.path.isdir(os.path.join(path, 'dir2')))
        self.assertFalse(os.path.isfile(os.path.join(path, 'file1')))

        task.undo()

        self.assertFalse(os.path.isdir(os.path.join(path, 'dir1')))
        self.assertFalse(os.path.isdir(os.path.join(path, 'dir2')))
    def test_create_zip(self):
        # create directory
        prepare_path = Path.objects.get(entity="path_preingest_prepare").value
        dirname = os.path.join(prepare_path, "zipdir")
        os.makedirs(dirname)

        # create empty file
        filename = os.path.join(dirname, "file.txt")
        open(filename, "a").close()

        zipname = dirname + ".zip"

        task = ProcessTask(
            name="preingest.tasks.CreateZIP",
            params={
                "dirname": dirname,
                "zipname": zipname
            },
        )
        task.run()

        self.assertTrue(os.path.isdir(dirname))
        self.assertTrue(os.path.isfile(filename))
        self.assertTrue(os.path.isfile(zipname))

        shutil.rmtree(dirname)
        task.undo()

        self.assertTrue(os.path.isdir(dirname))
        self.assertTrue(os.path.isfile(filename))
        self.assertFalse(os.path.isfile(zipname))
Esempio n. 9
0
    def test_receive_sip(self):
        ip = InformationPackage.objects.create()

        srctar = os.path.join(self.ingest_reception, "%s.tar" % ip.pk)
        srcxml = os.path.join(self.ingest_reception, "%s.xml" % ip.pk)
        open(srctar, "a").close()
        open(srcxml, "a").close()

        ip.ObjectPath = os.path.join(self.ingest_reception, str(ip.pk) + ".tar")
        ip.save()

        task = ProcessTask(
            name="preingest.tasks.ReceiveSIP",
            params={
                "ip": ip
            },
        )
        task.run()

        self.assertTrue(os.path.isfile(os.path.join(self.ingest_work, str(ip.pk) + ".tar")))
        self.assertTrue(os.path.isfile(os.path.join(self.ingest_work, str(ip.pk) + ".xml")))

        task.undo()

        self.assertFalse(os.path.isfile(os.path.join(self.ingest_work, str(ip.pk) + ".tar")))
        self.assertFalse(os.path.isfile(os.path.join(self.ingest_work, str(ip.pk) + ".xml")))
Esempio n. 10
0
    def create(self, validated_data):
        tasks = []
        for ip in validated_data['information_packages']:
            storage_methods = ip.get_migratable_storage_methods()
            if not storage_methods.exists():
                raise ValueError('No storage methods available for migration')

            for storage_method in storage_methods:
                t = ProcessTask(
                    name='ESSArch_Core.storage.tasks.StorageMigration',
                    label='Migrate to {}'.format(storage_method),
                    args=[str(storage_method.pk), validated_data['temp_path']],
                    information_package=ip,
                    responsible=self.context['request'].user,
                    eager=False,
                )
                tasks.append(t)

        ProcessTask.objects.bulk_create(tasks, 100)

        for t in tasks:
            t.run()

        return ProcessTask.objects.filter(pk__in=[t.pk for t in tasks])
Esempio n. 11
0
def _create_on_error_tasks(errors, ip=None, responsible=None, eager=False, status=celery_states.PENDING):
    for on_error_idx, on_error in enumerate(errors):
        args = on_error.get('args', [])
        params = on_error.get('params', {})
        result_params = on_error.get('result_params', {})
        yield ProcessTask(
            name=on_error['name'],
            reference=on_error.get('reference', None),
            label=on_error.get('label'),
            hidden=on_error.get('hidden', False),
            args=args,
            params=params,
            result_params=result_params,
            eager=eager,
            information_package=ip,
            responsible=responsible,
            processstep_pos=on_error_idx,
            status=status,
        )
Esempio n. 12
0
    def identify_ip(self, request):
        fname = request.data.get('label')
        spec_data = request.data.get('specification_data', {})

        uip = Path.objects.get(entity="path_ingest_unidentified").value
        container_file = os.path.join(uip, fname)

        if not os.path.isfile(container_file):
            return Response(
                {'status': '%s does not exist' % container_file},
                status=status.HTTP_400_BAD_REQUEST
            )

        spec = json.loads(open(
            os.path.join(settings.BASE_DIR, 'templates/SDTemplate.json')
        ).read())

        ip_id = uuid.uuid4()

        spec_data['_OBJID'] = unicode(ip_id)
        spec_data['_OBJLABEL'] = spec_data.pop('LABEL')
        spec_data['_IP_CREATEDATE'] = timestamp_to_datetime(
            creation_date(container_file)
        ).isoformat()

        infoxml = u'%s.xml' % unicode(ip_id)
        infoxml = os.path.join(uip, infoxml)

        ProcessTask(
            name='preingest.tasks.GenerateXML',
            params={
                'info': spec_data,
                'filesToCreate': {
                    infoxml: spec
                },
                'folderToParse': container_file,
            },
        ).run_eagerly()

        return Response({'status': 'Identified IP, created %s' % infoxml})
    def test_create_ip_root_dir(self):
        ip = InformationPackage.objects.create(Label="ip1")
        prepare_path = Path.objects.get(entity="path_preingest_prepare").value
        prepare_path = os.path.join(prepare_path, unicode(ip.pk))

        task = ProcessTask(
            name="preingest.tasks.CreateIPRootDir",
            params={
                "information_package": ip,
            },
        )
        task.run()

        self.assertTrue(os.path.isdir(prepare_path))

        task.undo()

        self.assertFalse(os.path.isdir(prepare_path))
    def test_prepare_ip(self):
        label = "ip1"
        user = User.objects.create(username="******")

        task = ProcessTask(name="preingest.tasks.PrepareIP",
                           params={
                               "label": label,
                               "responsible": user
                           },
                           responsible=user)
        task.run()

        self.assertTrue(
            InformationPackage.objects.filter(Label=label).exists())

        task.undo()

        self.assertFalse(
            InformationPackage.objects.filter(Label=label).exists())
    def test_submit_sip(self):
        ip = InformationPackage.objects.create(Label="ip1")

        srctar = os.path.join(self.preingest_reception, "%s.tar" % ip.pk)
        srcxml = os.path.join(self.preingest_reception, "%s.xml" % ip.pk)
        dsttar = os.path.join(self.ingest_reception, "%s.tar" % ip.pk)
        dstxml = os.path.join(self.ingest_reception, "%s.xml" % ip.pk)
        open(srctar, "a").close()
        open(srcxml, "a").close()

        task = ProcessTask(
            name="preingest.tasks.SubmitSIP",
            params={"ip": ip},
        )
        task.run()

        self.assertTrue(os.path.isfile(dsttar))
        self.assertTrue(os.path.isfile(dstxml))

        task.undo()

        self.assertFalse(os.path.isfile(dsttar))
        self.assertFalse(os.path.isfile(dstxml))
Esempio n. 16
0
    def generate(self, folderToParse=None, algorithm='SHA-256'):
        files = []

        mimetypes.suffix_map = {}
        mimetypes.encodings_map = {}
        mimetypes.types_map = {}
        mimetypes.common_types = {}
        mimetypes_file = Path.objects.get(
            entity="path_mimetypes_definitionfile").value
        mimetypes.init(files=[mimetypes_file])
        mtypes = mimetypes.types_map

        responsible = None

        if folderToParse:
            folderToParse = folderToParse.rstrip('/')
            step = ProcessStep.objects.create(
                name="File operations for %s" %
                (os.path.basename(folderToParse)),
                parallel=True,
            )

            tasks = []

            if self.task is not None and self.task.step is not None:
                responsible = self.task.responsible
                step.parent_step_id = self.task.step
                step.save()

            folderToParse = unicode(folderToParse)

            external = self.find_external_dirs()

            for ext_file, ext_dir, ext_spec in external:
                ext_sub_dirs = next(walk(os.path.join(folderToParse,
                                                      ext_dir)))[1]
                for sub_dir in ext_sub_dirs:
                    ptr_file_path = os.path.join(ext_dir, sub_dir, ext_file)

                    ext_info = self.info
                    ext_info['_EXT'] = sub_dir
                    ext_info['_EXT_HREF'] = ptr_file_path

                    external_gen = XMLGenerator(
                        filesToCreate={
                            os.path.join(folderToParse, ptr_file_path):
                            ext_spec
                        },
                        info=ext_info,
                        task=self.task,
                    )
                    external_gen.generate(
                        os.path.join(folderToParse, ext_dir, sub_dir))

                    tasks.append(
                        ProcessTask(
                            name="ESSArch_Core.tasks.ParseFile",
                            params={
                                'filepath':
                                os.path.join(folderToParse, ptr_file_path),
                                'mimetype':
                                self.get_mimetype(mtypes, ptr_file_path),
                                'relpath':
                                ptr_file_path,
                                'algorithm':
                                algorithm,
                                'rootdir':
                                sub_dir
                            },
                            responsible_id=responsible,
                            processstep=step,
                        ))

            if os.path.isfile(folderToParse):
                tasks.append(
                    ProcessTask(
                        name="ESSArch_Core.tasks.ParseFile",
                        params={
                            'filepath': folderToParse,
                            'mimetype':
                            self.get_mimetype(mtypes, folderToParse),
                            'relpath': os.path.basename(folderToParse),
                            'algorithm': algorithm
                        },
                        processstep=step,
                        responsible_id=responsible,
                    ))
            elif os.path.isdir(folderToParse):
                for root, dirnames, filenames in walk(folderToParse):
                    dirnames[:] = [
                        d for d in dirnames
                        if d not in [e[1] for e in external]
                    ]

                    for fname in filenames:
                        filepath = os.path.join(root, fname)
                        relpath = os.path.relpath(filepath, folderToParse)
                        tasks.append(
                            ProcessTask(
                                name="ESSArch_Core.tasks.ParseFile",
                                params={
                                    'filepath': filepath,
                                    'mimetype':
                                    self.get_mimetype(mtypes, filepath),
                                    'relpath': relpath,
                                    'algorithm': algorithm
                                },
                                responsible_id=responsible,
                                processstep=step,
                            ))

            ProcessTask.objects.bulk_create(tasks, 1000)

            with allow_join_result():
                for fileinfo in step.chunk():
                    files.append(fileinfo)

        for idx, f in enumerate(self.toCreate):
            fname = f['file']
            rootEl = f['root']

            self.info['_XML_FILENAME'] = os.path.basename(fname)

            tree = etree.ElementTree(
                rootEl.createLXMLElement(self.info,
                                         files=files,
                                         folderToParse=folderToParse,
                                         task=self.task))
            tree.write(fname,
                       pretty_print=True,
                       xml_declaration=True,
                       encoding='UTF-8')

            try:
                relpath = os.path.relpath(fname, folderToParse)
            except:
                relpath = fname

            if idx < len(self.toCreate) - 1:
                parsefile_task = ProcessTask.objects.create(
                    name="ESSArch_Core.tasks.ParseFile",
                    params={
                        'filepath': fname,
                        'mimetype': self.get_mimetype(mtypes, fname),
                        'relpath': relpath,
                        'algorithm': algorithm
                    },
                    responsible_id=responsible,
                    processstep_id=self.task.step if self.task else None)

                with allow_join_result():
                    files.append(parsefile_task.run().get())
Esempio n. 17
0
    def remote(self,
               src,
               dst,
               requests_session=None,
               block_size=65536,
               step=None):
        step = ProcessStep.objects.create(name="Copy %s to %s" % (src, dst),
                                          parent_step_id=step)

        file_size = os.stat(src).st_size
        idx = 0

        tasks = []

        t = ProcessTask.objects.create(
            name="ESSArch_Core.tasks.CopyChunk",
            args=[src, dst, idx * block_size],
            params={
                'requests_session': requests_session,
                'file_size': file_size,
                'block_size': block_size,
            },
            processstep=step,
            processstep_pos=idx,
        )
        upload_id = t.run().get()
        idx += 1

        while idx * block_size <= file_size:
            tasks.append(
                ProcessTask(
                    name="ESSArch_Core.tasks.CopyChunk",
                    args=[src, dst, idx * block_size],
                    params={
                        'requests_session': requests_session,
                        'file_size': file_size,
                        'block_size': block_size,
                        'upload_id': upload_id,
                    },
                    processstep=step,
                    processstep_pos=idx,
                ))
            idx += 1

        ProcessTask.objects.bulk_create(tasks, 1000)

        step.resume().get()

        md5 = ProcessTask.objects.create(
            name="ESSArch_Core.tasks.CalculateChecksum",
            params={
                "filename": src,
                "block_size": block_size,
                "algorithm": 'MD5'
            },
            information_package_id=self.ip,
            responsible_id=self.responsible,
        ).run().get()

        completion_url = dst.rstrip('/') + '_complete/'

        m = MultipartEncoder(fields={
            'path': os.path.basename(src),
            'upload_id': upload_id,
            'md5': md5,
        })
        headers = {'Content-Type': m.content_type}

        response = requests_session.post(completion_url,
                                         data=m,
                                         headers=headers)
        response.raise_for_status()
Esempio n. 18
0
    def run(self,
            filepath=None,
            mimetype=None,
            relpath=None,
            algorithm='SHA-256',
            rootdir=''):
        if not relpath:
            relpath = filepath

        relpath = win_to_posix(relpath)

        timestamp = creation_date(filepath)
        createdate = timestamp_to_datetime(timestamp)

        checksum_task = ProcessTask(
            name="ESSArch_Core.tasks.CalculateChecksum",
            params={
                "filename": filepath,
                "algorithm": algorithm
            },
            processstep_id=self.step,
            responsible_id=self.responsible,
            information_package_id=self.ip)

        fileformat_task = ProcessTask(
            name="ESSArch_Core.tasks.IdentifyFileFormat",
            params={
                "filename": filepath,
            },
            processstep_id=self.step,
            responsible_id=self.responsible,
            information_package_id=self.ip)

        ProcessTask.objects.bulk_create([checksum_task, fileformat_task])

        checksum = checksum_task.run().get()
        self.set_progress(50, total=100)
        (format_name, format_version,
         format_registry_key) = fileformat_task.run().get()

        fileinfo = {
            'FName': os.path.basename(relpath),
            'FDir': rootdir,
            'FChecksum': checksum,
            'FID': str(uuid.uuid4()),
            'daotype': "borndigital",
            'href': relpath,
            'FMimetype': mimetype,
            'FCreated': createdate.isoformat(),
            'FFormatName': format_name,
            'FFormatVersion': format_version,
            'FFormatRegistryKey': format_registry_key,
            'FSize': str(os.path.getsize(filepath)),
            'FUse': 'Datafile',
            'FChecksumType': algorithm,
            'FLoctype': 'URL',
            'FLinkType': 'simple',
            'FChecksumLib': 'hashlib',
            'FLocationType': 'URI',
            'FIDType': 'UUID',
        }

        return fileinfo