Python BagIt.update Beispiele, pybagit.bagit.BagIt.update Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: bagmanifest.py Projekt: RKrahl/pybagit

class ManifestTest(unittest.TestCase):
    def setUp(self):
        self.bag = BagIt(os.path.join(os.getcwd(), 'test', 'testbag'))

    def set_hash_md5(self):
        self.bag.set_hash_encoding('md5')
        self.assertEquals(self.hash_encoding, u'md5')

    def set_hash_sha1(self):
        self.bag.set_hash_encoding('sha1')
        self.assertEquals(self.hash_encoding, u'sha1')

    def test_sha1(self):
        self.bag.set_hash_encoding('sha1')
        self.bag.update()
        self.assertEquals(self.bag.manifest_contents[os.path.join('data', 'subdir', 'subsubdir', 'angry.jpg')],
                u'c5913ae67aa40398f1182e52d2fa2c2e4c08f696')

    def test_md5(self):
        self.bag.set_hash_encoding('md5')
        self.bag.update()
        self.assertEquals(self.bag.manifest_contents[os.path.join('data', 'subdir', 'subsubdir', 'angry.jpg')],
                '5f294603675cb6c0f83cef9316bb5be7')

    def test_sha1_manifest(self):
        self.bag.set_hash_encoding('sha1')
        self.bag.update()
        self.assertEquals(os.path.basename(self.bag.manifest_file),
                'manifest-sha1.txt')

    def test_md5_manifest(self):
        self.bag.set_hash_encoding('md5')
        self.bag.update()
        self.assertEquals(os.path.basename(self.bag.manifest_file),
                'manifest-md5.txt')

Beispiel #2

0

Datei anzeigen

class UpdateTest(unittest.TestCase):
    def setUp(self):
        self.bag = BagIt(os.path.join(os.getcwd(), "test", "testbag"))
        self.invalid_bag = BagIt(
            os.path.join(os.getcwd(), "test", "invalid_bag"))

    def tearDown(self):
        if os.path.exists(os.path.join(os.getcwd(), "test", "invalid_bag")):
            shutil.rmtree(os.path.join(os.getcwd(), "test", "invalid_bag"))

    def test_full_update(self):
        self.bag.update(full=True)
        self.assertEqual(len(self.bag.bag_errors), 0)

    def test_partial_update(self):
        self.bag.update(full=False)
        self.assertEqual(len(self.bag.bag_errors), 0)

    def test_is_valid(self):
        self.bag.update()
        self.assertEqual(self.bag.is_valid(), True)

    def test_not_valid(self):
        os.remove(self.invalid_bag.manifest_file)
        self.invalid_bag.validate()
        self.assertEqual(self.invalid_bag.is_valid(), False)

Beispiel #3

0

Datei anzeigen

Datei: bagupdate.py Projekt: RKrahl/pybagit

class UpdateTest(unittest.TestCase):

    def setUp(self):
        self.bag = BagIt(os.path.join(os.getcwd(), 'test', 'testbag'))
        self.invalid_bag = BagIt(os.path.join(os.getcwd(), 'test', 'invalid_bag'))

    def tearDown(self):
        if os.path.exists(os.path.join(os.getcwd(), 'test', 'invalid_bag')):
            shutil.rmtree(os.path.join(os.getcwd(), 'test', 'invalid_bag'))

    def test_full_update(self):
        self.bag.update(full=True)
        self.assertEquals(len(self.bag.bag_errors), 0)

    def test_partial_update(self):
        self.bag.update(full=False)
        self.assertEquals(len(self.bag.bag_errors), 0)

    def test_is_valid(self):
        self.bag.update()
        self.assertEquals(self.bag.is_valid(), True)

    def test_not_valid(self):
        os.remove(self.invalid_bag.manifest_file)
        self.invalid_bag.validate()
        self.assertEquals(self.invalid_bag.is_valid(), False)

Beispiel #4

0

Datei anzeigen

class ManifestTest(unittest.TestCase):
    def setUp(self):
        self.bag = BagIt(os.path.join(os.getcwd(), "test", "testbag"))

    def set_hash_md5(self):
        self.bag.set_hash_encoding("md5")
        self.assertEqual(self.hash_encoding, "md5")

    def set_hash_sha1(self):
        self.bag.set_hash_encoding("sha1")
        self.assertEqual(self.hash_encoding, "sha1")

    def test_sha1(self):
        self.bag.set_hash_encoding("sha1")
        self.bag.update()
        self.assertEqual(
            self.bag.manifest_contents[os.path.join("data", "subdir",
                                                    "subsubdir", "angry.jpg")],
            "c5913ae67aa40398f1182e52d2fa2c2e4c08f696",
        )

    def test_md5(self):
        self.bag.set_hash_encoding("md5")
        self.bag.update()
        self.assertEqual(
            self.bag.manifest_contents[os.path.join("data", "subdir",
                                                    "subsubdir", "angry.jpg")],
            "5f294603675cb6c0f83cef9316bb5be7",
        )

    def test_sha1_manifest(self):
        self.bag.set_hash_encoding("sha1")
        self.bag.update()
        self.assertEqual(os.path.basename(self.bag.manifest_file),
                         "manifest-sha1.txt")

    def test_md5_manifest(self):
        self.bag.set_hash_encoding("md5")
        self.bag.update()
        self.assertEqual(os.path.basename(self.bag.manifest_file),
                         "manifest-md5.txt")

Beispiel #5

0

Datei anzeigen

class ManifestTest(unittest.TestCase):
    def setUp(self):
        self.bag = BagIt(os.path.join(os.getcwd(), 'test', 'testbag'))

    def set_hash_md5(self):
        self.bag.set_hash_encoding('md5')
        self.assertEquals(self.hash_encoding, u'md5')

    def set_hash_sha1(self):
        self.bag.set_hash_encoding('sha1')
        self.assertEquals(self.hash_encoding, u'sha1')

    def test_sha1(self):
        self.bag.set_hash_encoding('sha1')
        self.bag.update()
        self.assertEquals(
            self.bag.manifest_contents[os.path.join('data', 'subdir',
                                                    'subsubdir', 'angry.jpg')],
            u'c5913ae67aa40398f1182e52d2fa2c2e4c08f696')

    def test_md5(self):
        self.bag.set_hash_encoding('md5')
        self.bag.update()
        self.assertEquals(
            self.bag.manifest_contents[os.path.join('data', 'subdir',
                                                    'subsubdir', 'angry.jpg')],
            '5f294603675cb6c0f83cef9316bb5be7')

    def test_sha1_manifest(self):
        self.bag.set_hash_encoding('sha1')
        self.bag.update()
        self.assertEquals(os.path.basename(self.bag.manifest_file),
                          'manifest-sha1.txt')

    def test_md5_manifest(self):
        self.bag.set_hash_encoding('md5')
        self.bag.update()
        self.assertEquals(os.path.basename(self.bag.manifest_file),
                          'manifest-md5.txt')

Beispiel #6

0

Datei anzeigen

Datei: core.py Projekt: studio-theyang/Rodan

    def run(self, rp_id):
        rp_query = ResultsPackage.objects.filter(uuid=rp_id)
        rp_query.update(status=task_status.PROCESSING,
                        celery_task_id=self.request.id)
        rp = rp_query.first()
        mode = rp.packaging_mode
        package_path = get_package_path(rp_id)

        output_objs = (
            Output.objects.filter(
                run_job__workflow_run=rp.workflow_run).select_related(
                    "resource", "resource__resource_type", "resource_list",
                    "run_job").prefetch_related("resource_list__resources").
            annotate(is_endpoint=Case(
                When(
                    condition=(
                        Q(resource__isnull=False)
                        & (Q(resource__inputs__isnull=True)
                           | ~Q(resource__inputs__run_job__workflow_run=rp.
                                workflow_run)))
                    | (Q(resource_list__isnull=False)
                       & (Q(resource_list__inputs__isnull=True)
                          | ~Q(resource_list__inputs__run_job__workflow_run=rp.
                               workflow_run))),
                    then=Value(True),
                ),
                default=Value(False),
                output_field=BooleanField(),
            )))

        if len(output_objs) > 0:
            percentage_increment = 70.00 / len(output_objs)
        else:
            percentage_increment = 0
        completed = 0.0

        with TemporaryDirectory() as td:
            tmp_dir = os.path.join(
                td, rp_id)  # because rp_id will be name of the packaged zip
            bag = BagIt(tmp_dir)

            job_namefinder = self._NameFinder()
            res_namefinder = self._NameFinder()

            for output in output_objs:
                if mode == 0:
                    # only endpoint resources, subdirectoried by different outputs
                    # continue if not endpoint output
                    if output.is_endpoint is False:
                        continue

                    j_name = job_namefinder.find(
                        output.run_job.workflow_job_id,
                        output.run_job.job_name)
                    opt_name = output.output_port_type_name
                    op_dir = os.path.join(tmp_dir,
                                          "{0} - {1}".format(j_name, opt_name))

                    rj_status = output.run_job.status
                    if rj_status == task_status.FINISHED:
                        if output.resource is not None:
                            filepath = output.resource.resource_file.path
                            ext = os.path.splitext(filepath)[1]

                            res_name = res_namefinder.find(
                                output.resource_id, output.resource.name
                            )  # [TODO]: or... find the modified resource name if the resource_uuid still exists?
                            result_filename = "{0}{1}".format(res_name, ext)
                            if not os.path.exists(op_dir):
                                os.makedirs(op_dir)
                            shutil.copyfile(
                                filepath, os.path.join(op_dir,
                                                       result_filename))
                        elif output.resource_list is not None:
                            res_name = res_namefinder.find(
                                output.resource_list_id,
                                output.resource_list.name
                            )  # [TODO]: or... find the modified resource name if the resource_uuid still exists?
                            result_foldername = "{0}.list".format(res_name)
                            result_folder = os.path.join(
                                op_dir, result_foldername)
                            if not os.path.exists(result_folder):
                                os.makedirs(result_folder)

                            cnt = output.resource_list.resources.count()
                            zfills = len(str(cnt))
                            for idx, r in enumerate(
                                    output.resource_list.resources.all()):
                                filepath = r.resource_file.path
                                ext = os.path.splitext(filepath)[1]
                                new_filename = "{0}{1}".format(
                                    str(idx).zfill(zfills), ext)
                                shutil.copyfile(
                                    filepath,
                                    os.path.join(result_folder, new_filename))

                elif mode == 1:
                    res_name = res_namefinder.find(
                        output.resource_id, output.resource.name
                    )  # [TODO]: or... find the modified resource name if the resource_uuid still exists?
                    res_dir = os.path.join(tmp_dir, res_name)

                    j_name = job_namefinder.find(
                        output.run_job.workflow_job_id,
                        output.run_job.job_name)
                    opt_name = output.output_port_type_name

                    rj_status = output.run_job.status
                    if rj_status == task_status.FINISHED:
                        if output.resource is not None:
                            filepath = output.resource.resource_file.path
                            ext = os.path.splitext(filepath)[1]
                            result_filename = "{0} - {1}{2}".format(
                                j_name, opt_name, ext)
                            if not os.path.exists(res_dir):
                                os.makedirs(res_dir)
                            shutil.copyfile(
                                filepath, os.path.join(res_dir,
                                                       result_filename))
                        elif output.resource_list is not None:
                            result_foldername = "{0} - {1}.list".format(
                                j_name, opt_name)
                            result_folder = os.path.join(
                                res_dir, result_foldername)
                            if not os.path.exists(result_folder):
                                os.makedirs(result_folder)

                            cnt = output.resource_list.resources.count()
                            zfills = len(str(cnt))
                            for idx, r in enumerate(
                                    output.resource_list.resources.all()):
                                filepath = r.resource_file.path
                                ext = os.path.splitext(filepath)[1]
                                new_filename = "{0}{1}".format(
                                    str(idx).zfill(zfills), ext)
                                shutil.copyfile(
                                    filepath,
                                    os.path.join(result_folder, new_filename))

                    elif rj_status == task_status.FAILED:
                        result_filename = "{0} - {1} - ERROR.txt".format(
                            j_name, opt_name)
                        if not os.path.exists(res_dir):
                            os.makedirs(res_dir)
                        with open(os.path.join(res_dir, result_filename),
                                  "w") as f:
                            f.write("Error Summary: ")
                            f.write(output.run_job.error_summary)
                            f.write("\n\nError Details:\n")
                            f.write(output.run_job.error_details)
                elif mode == 2:
                    raise NotImplementedError()  # [TODO]
                else:
                    raise ValueError("mode {0} is not supported".format(mode))

                completed += percentage_increment
                rp_query.update(percent_completed=int(completed))

            # print([os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(tmp_dir)) for f in fn])   # DEBUG
            bag.update()
            errors = bag.validate()
            if not bag.is_valid:
                rp_query.update(
                    status=task_status.FAILED,
                    error_summary="The bag failed validation.",
                    error_details=str(errors),
                )

            target_dir_name = os.path.dirname(package_path)
            if not os.path.isdir(target_dir_name):
                os.makedirs(target_dir_name)
            bag.package(target_dir_name, method="zip")

        rp_query.update(status=task_status.FINISHED, percent_completed=100)
        expiry_time = rp_query.values_list("expiry_time", flat=True)[0]
        if expiry_time:
            async_task = registry.tasks[
                "rodan.core.expire_package"].apply_async((rp_id, ),
                                                         eta=expiry_time,
                                                         queue="celery")
            expire_task_id = async_task.task_id
        else:
            expire_task_id = None

        rp_query.update(celery_task_id=expire_task_id)
        return True

Beispiel #7

0

Datei anzeigen

    def run(self, package_id, *args, **kwargs):
        resultspackage = ResultsPackage.objects.get(pk=package_id)
        if resultspackage.status == RunJobStatus.CANCELLED:
            return

        resultspackage.status = ResultsPackageStatus.PROCESSING
        resultspackage.save()

        runjobs = resultspackage.workflow_run.run_jobs.select_related(
            'page', 'job').all()

        if not resultspackage.pages.exists():
            pages = set()
            for runjob in runjobs:
                pages.add(runjob.page)
        else:
            pages = resultspackage.pages.all()

        jobs = resultspackage.jobs.all()
        self.package_path = resultspackage.package_path

        # The chunks are intervals used to update the percent_completed field.
        if len(pages) > 0:
            page_chunk = 70.00 / len(pages)
        completed = 0.0

        bag = BagIt(resultspackage.bag_path)

        for page in pages:
            page_dir = os.path.join(bag.data_directory, page.name)
            os.makedirs(page_dir)
            page_runjobs = runjobs.filter(page=page)

            if not jobs:
                # If no jobs are provided, we will just make a list of jobs from the available runjobs.
                jobs = []
                if len(page_runjobs) > 0:
                    runjob_chunk = page_chunk / len(page_runjobs)

                for runjob in page_runjobs:
                    _add_result_to_bag(page_dir, runjob, bag)

                    completed += runjob_chunk
                    _ensure_db_state(resultspackage)
                    _update_progress(resultspackage, completed)

                    if runjob.workflow_job.job not in jobs:
                        jobs.append(runjob.workflow_job.job)

            else:
                if len(jobs) > 0:
                    job_chunk = page_chunk / len(jobs)

                for job in jobs:
                    matcthing_runjobs = page_runjobs.filter(
                        workflow_job__job=job)
                    if len(matcthing_runjobs) > 0:
                        runjob_chunk = job_chunk / len(matcthing_runjobs)

                    for runjob in matcthing_runjobs:
                        _add_result_to_bag(page_dir, runjob, bag)

                        completed += runjob_chunk
                        _ensure_db_state(resultspackage)
                        _update_progress(resultspackage, completed)

        bag.update()
        errors = bag.validate()
        if not bag.is_valid:
            _ensure_db_state(resultspackage)
            resultspackage.status = ResultsPackageStatus.FAILED
            resultspackage.save()
            raise BagNotValidError("The bag failed validation.\n" +
                                   str(errors))

        bag.package(resultspackage.package_path, method='zip')
        resultspackage.download_url = resultspackage.file_url
        resultspackage.percent_completed = 100
        resultspackage.status = ResultsPackageStatus.COMPLETE

        # If pages and jobs were not provided, we populate these fields now
        # since we have figured them out.
        resultspackage.pages = pages
        resultspackage.jobs = jobs

        _ensure_db_state(resultspackage)
        resultspackage.save()
        shutil.rmtree(resultspackage.bag_path)

Beispiel #8

0

Datei anzeigen

Datei: core.py Projekt: DDMAL/Rodan

    def run(self, rp_id):
        rp_query = ResultsPackage.objects.filter(uuid=rp_id)
        rp_query.update(status=task_status.PROCESSING, celery_task_id=self.request.id)
        rp = rp_query.first()
        mode = rp.packaging_mode
        package_path = get_package_path(rp_id)

        output_objs = Output.objects.filter(
            run_job__workflow_run=rp.workflow_run
        ).select_related(
            'resource', 'resource__resource_type', 'resource_list', 'run_job'
        ).prefetch_related(
            'resource_list__resources'
        ).annotate(
            is_endpoint=Case(
                When(
                    condition=(
                        Q(resource__isnull=False)
                        & (
                            Q(resource__inputs__isnull=True)
                            | ~Q(resource__inputs__run_job__workflow_run=rp.workflow_run)
                        )
                    ) | (
                        Q(resource_list__isnull=False)
                        & (
                            Q(resource_list__inputs__isnull=True)
                            | ~Q(resource_list__inputs__run_job__workflow_run=rp.workflow_run)
                        )
                    ),
                    then=Value(True)
                ),
                default=Value(False),
                output_field=BooleanField()
            )
        )

        if len(output_objs) > 0:
            percentage_increment = 70.00 / len(output_objs)
        else:
            percentage_increment = 0
        completed = 0.0

        with TemporaryDirectory() as td:
            tmp_dir = os.path.join(td, rp_id)  # because rp_id will be name of the packaged zip
            bag = BagIt(tmp_dir)

            job_namefinder = self._NameFinder()
            res_namefinder = self._NameFinder()

            for output in output_objs:
                if mode == 0:  # only endpoint resources, subdirectoried by different outputs
                    # continue if not endpoint output
                    if output.is_endpoint is False:
                        continue

                    j_name = job_namefinder.find(output.run_job.workflow_job_id, output.run_job.job_name)
                    opt_name = output.output_port_type_name
                    op_dir = os.path.join(tmp_dir, "{0} - {1}".format(j_name, opt_name))

                    rj_status = output.run_job.status
                    if rj_status == task_status.FINISHED:
                        if output.resource is not None:
                            filepath = output.resource.resource_file.path
                            ext = os.path.splitext(filepath)[1]

                            res_name = res_namefinder.find(output.resource_id, output.resource.name)  # [TODO]: or... find the modified resource name if the resource_uuid still exists?
                            result_filename = "{0}{1}".format(res_name, ext)
                            if not os.path.exists(op_dir):
                                os.makedirs(op_dir)
                            shutil.copyfile(filepath, os.path.join(op_dir, result_filename))
                        elif output.resource_list is not None:
                            res_name = res_namefinder.find(output.resource_list_id, output.resource_list.name)  # [TODO]: or... find the modified resource name if the resource_uuid still exists?
                            result_foldername = "{0}.list".format(res_name)
                            result_folder = os.path.join(op_dir, result_foldername)
                            if not os.path.exists(result_folder):
                                os.makedirs(result_folder)

                            cnt = output.resource_list.resources.count()
                            zfills = len(str(cnt))
                            for idx, r in enumerate(output.resource_list.resources.all()):
                                filepath = r.resource_file.path
                                ext = os.path.splitext(filepath)[1]
                                new_filename = "{0}{1}".format(str(idx).zfill(zfills), ext)
                                shutil.copyfile(filepath, os.path.join(result_folder, new_filename))

                elif mode == 1:
                    res_name = res_namefinder.find(output.resource_id, output.resource.name)  # [TODO]: or... find the modified resource name if the resource_uuid still exists?
                    res_dir = os.path.join(tmp_dir, res_name)

                    j_name = job_namefinder.find(output.run_job.workflow_job_id, output.run_job.job_name)
                    opt_name = output.output_port_type_name

                    rj_status = output.run_job.status
                    if rj_status == task_status.FINISHED:
                        if output.resource is not None:
                            filepath = output.resource.resource_file.path
                            ext = os.path.splitext(filepath)[1]
                            result_filename = "{0} - {1}{2}".format(j_name, opt_name, ext)
                            if not os.path.exists(res_dir):
                                os.makedirs(res_dir)
                            shutil.copyfile(filepath, os.path.join(res_dir, result_filename))
                        elif output.resource_list is not None:
                            result_foldername = "{0} - {1}.list".format(j_name, opt_name)
                            result_folder = os.path.join(res_dir, result_foldername)
                            if not os.path.exists(result_folder):
                                os.makedirs(result_folder)

                            cnt = output.resource_list.resources.count()
                            zfills = len(str(cnt))
                            for idx, r in enumerate(output.resource_list.resources.all()):
                                filepath = r.resource_file.path
                                ext = os.path.splitext(filepath)[1]
                                new_filename = "{0}{1}".format(str(idx).zfill(zfills), ext)
                                shutil.copyfile(filepath, os.path.join(result_folder, new_filename))

                    elif rj_status == task_status.FAILED:
                        result_filename = "{0} - {1} - ERROR.txt".format(j_name, opt_name)
                        if not os.path.exists(res_dir):
                            os.makedirs(res_dir)
                        with open(os.path.join(res_dir, result_filename), 'w') as f:
                            f.write("Error Summary: ")
                            f.write(output.run_job.error_summary)
                            f.write("\n\nError Details:\n")
                            f.write(output.run_job.error_details)
                elif mode == 2:
                    raise NotImplementedError() # [TODO]
                else:
                    raise ValueError("mode {0} is not supported".format(mode))

                completed += percentage_increment
                rp_query.update(percent_completed=int(completed))

            #print [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(tmp_dir)) for f in fn]   # DEBUG
            bag.update()
            errors = bag.validate()
            if not bag.is_valid:
                rp_query.update(status=task_status.FAILED,
                                error_summary="The bag failed validation.",
                                error_details=str(errors))

            target_dir_name = os.path.dirname(package_path)
            if not os.path.isdir(target_dir_name):
                os.makedirs(target_dir_name)
            bag.package(target_dir_name, method='zip')


        rp_query.update(status=task_status.FINISHED,
                        percent_completed=100)
        expiry_time = rp_query.values_list('expiry_time', flat=True)[0]
        if expiry_time:
            async_task = registry.tasks['rodan.core.expire_package'].apply_async((rp_id, ), eta=expiry_time)
            expire_task_id = async_task.task_id
        else:
            expire_task_id = None

        rp_query.update(celery_task_id=expire_task_id)
        return True

Beispiel #9

0

Datei anzeigen

Datei: resultspackagemanager.py Projekt: lexpar/Rodan

    def run(self, package_id, *args, **kwargs):
        resultspackage = ResultsPackage.objects.get(pk=package_id)
        if resultspackage.status == RunJobStatus.CANCELLED:
            return

        resultspackage.status = ResultsPackageStatus.PROCESSING
        resultspackage.save()

        runjobs = resultspackage.workflow_run.run_jobs.select_related('page', 'job').all()

        if not resultspackage.pages.exists():
            pages = set()
            for runjob in runjobs:
                pages.add(runjob.page)
        else:
            pages = resultspackage.pages.all()

        jobs = resultspackage.jobs.all()
        self.package_path = resultspackage.package_path

        # The chunks are intervals used to update the percent_completed field.
        if len(pages) > 0:
            page_chunk = 70.00 / len(pages)
        completed = 0.0

        bag = BagIt(resultspackage.bag_path)

        for page in pages:
            page_dir = os.path.join(bag.data_directory, page.name)
            os.makedirs(page_dir)
            page_runjobs = runjobs.filter(page=page)

            if not jobs:
                # If no jobs are provided, we will just make a list of jobs from the available runjobs.
                jobs = []
                if len(page_runjobs) > 0:
                    runjob_chunk = page_chunk / len(page_runjobs)

                for runjob in page_runjobs:
                    _add_result_to_bag(page_dir, runjob, bag)

                    completed += runjob_chunk
                    _ensure_db_state(resultspackage)
                    _update_progress(resultspackage, completed)

                    if runjob.workflow_job.job not in jobs:
                        jobs.append(runjob.workflow_job.job)

            else:
                if len(jobs) > 0:
                    job_chunk = page_chunk / len(jobs)

                for job in jobs:
                    matcthing_runjobs = page_runjobs.filter(workflow_job__job=job)
                    if len(matcthing_runjobs) > 0:
                        runjob_chunk = job_chunk / len(matcthing_runjobs)

                    for runjob in matcthing_runjobs:
                        _add_result_to_bag(page_dir, runjob, bag)

                        completed += runjob_chunk
                        _ensure_db_state(resultspackage)
                        _update_progress(resultspackage, completed)

        bag.update()
        errors = bag.validate()
        if not bag.is_valid:
            _ensure_db_state(resultspackage)
            resultspackage.status = ResultsPackageStatus.FAILED
            resultspackage.save()
            raise BagNotValidError("The bag failed validation.\n" + str(errors))

        bag.package(resultspackage.package_path, method='zip')
        resultspackage.download_url = resultspackage.file_url
        resultspackage.percent_completed = 100
        resultspackage.status = ResultsPackageStatus.COMPLETE

        # If pages and jobs were not provided, we populate these fields now
        # since we have figured them out.
        resultspackage.pages = pages
        resultspackage.jobs = jobs

        _ensure_db_state(resultspackage)
        resultspackage.save()
        shutil.rmtree(resultspackage.bag_path)