class UpdateTest(unittest.TestCase): def setUp(self): self.bag = BagIt(os.path.join(os.getcwd(), 'test', 'testbag')) self.invalid_bag = BagIt(os.path.join(os.getcwd(), 'test', 'invalid_bag')) def tearDown(self): if os.path.exists(os.path.join(os.getcwd(), 'test', 'invalid_bag')): shutil.rmtree(os.path.join(os.getcwd(), 'test', 'invalid_bag')) def test_full_update(self): self.bag.update(full=True) self.assertEquals(len(self.bag.bag_errors), 0) def test_partial_update(self): self.bag.update(full=False) self.assertEquals(len(self.bag.bag_errors), 0) def test_is_valid(self): self.bag.update() self.assertEquals(self.bag.is_valid(), True) def test_not_valid(self): os.remove(self.invalid_bag.manifest_file) self.invalid_bag.validate() self.assertEquals(self.invalid_bag.is_valid(), False)
class UpdateTest(unittest.TestCase): def setUp(self): self.bag = BagIt(os.path.join(os.getcwd(), "test", "testbag")) self.invalid_bag = BagIt( os.path.join(os.getcwd(), "test", "invalid_bag")) def tearDown(self): if os.path.exists(os.path.join(os.getcwd(), "test", "invalid_bag")): shutil.rmtree(os.path.join(os.getcwd(), "test", "invalid_bag")) def test_full_update(self): self.bag.update(full=True) self.assertEqual(len(self.bag.bag_errors), 0) def test_partial_update(self): self.bag.update(full=False) self.assertEqual(len(self.bag.bag_errors), 0) def test_is_valid(self): self.bag.update() self.assertEqual(self.bag.is_valid(), True) def test_not_valid(self): os.remove(self.invalid_bag.manifest_file) self.invalid_bag.validate() self.assertEqual(self.invalid_bag.is_valid(), False)
def test_uncompress_tgz(self): # create an empty tgz bag. newbag = BagIt(os.path.join(os.getcwd(), 'test', 'newtgzbag')) newbag.package(os.path.join(os.getcwd(), 'test')) # remove the created bag directory shutil.rmtree(os.path.join(os.getcwd(), 'test', 'newtgzbag')) # this should leave us with just newtgzbag.tgz tgzbag = BagIt(os.path.join(os.getcwd(), 'test', 'newtgzbag.tgz')) self.assertTrue(os.path.exists(tgzbag.bag_directory))
def test_uncompress_zip(self): # create an empty zip bag. newbag = BagIt(os.path.join(os.getcwd(), "test", "newzipbag")) newbag.package(os.path.join(os.getcwd(), "test"), method="zip") # remove the created bag directory shutil.rmtree(os.path.join(os.getcwd(), "test", "newzipbag")) # this should leave us with just newtgzbag.tgz zipbag = BagIt(os.path.join(os.getcwd(), "test", "newzipbag.zip")) self.assertTrue(os.path.exists(zipbag.bag_directory))
def test_uncompress_zip(self): # create an empty zip bag. newbag = BagIt(os.path.join(os.getcwd(), 'test', 'newzipbag')) newbag.package(os.path.join(os.getcwd(), 'test'), method='zip') # remove the created bag directory shutil.rmtree(os.path.join(os.getcwd(), 'test', 'newzipbag')) # this should leave us with just newtgzbag.tgz zipbag = BagIt(os.path.join(os.getcwd(), 'test', 'newzipbag.zip')) self.assertTrue(os.path.exists(zipbag.bag_directory))
def setUp(self): self.bag = BagIt(os.path.join(os.getcwd(), "test", "testbag")) self.test_fetch_contents = [{ "filename": "data/bagitspec.pdf", "length": "-", "url": "http://www.digitalpreservation.gov/documents/bagitspec.pdf", }]
def setUp(self): self.bag = BagIt(os.path.join(os.getcwd(), 'test', 'testbag')) self.test_fetch_contents = [{ 'filename': u'data/bagitspec.pdf', 'length': u'-', 'url': u'http://www.digitalpreservation.gov/documents/bagitspec.pdf' }]
class FetchTest(unittest.TestCase): ### THESE MAY FAIL if the Websites change their files. In that case, ### just comment this test out, or replace the assertion with a new ### SHA1 Checksum. def setUp(self): self.bag = BagIt(os.path.join(os.getcwd(), "test", "testbag")) self.test_fetch_contents = [{ "filename": "data/bagitspec.pdf", "length": "-", "url": "http://www.digitalpreservation.gov/documents/bagitspec.pdf", }] def tearDown(self): # if os.path.exists(os.path.join(os.getcwd(), 'test', 'testbag', 'data', 'bagitspec.pdf')): # os.remove(os.path.join(os.getcwd(), 'test', 'testbag', 'data', 'bagitspec.pdf')) if os.path.exists( os.path.join(os.getcwd(), "test", "testbag", "data", "stealin_mah_bag.jpg")): os.remove( os.path.join(os.getcwd(), "test", "testbag", "data", "stealin_mah_bag.jpg")) if os.path.exists( os.path.join(os.getcwd(), "test", "testbag", "data", "bagitspec.pdf")): os.remove( os.path.join(os.getcwd(), "test", "testbag", "data", "bagitspec.pdf")) self.bag.add_fetch_entries(self.test_fetch_contents, append=False) def test_fetch_contents(self): self.assertEqual(self.bag.fetch_contents, self.test_fetch_contents) def test_can_fetch(self): self.bag.fetch() self.assertTrue( os.path.exists( os.path.join(os.getcwd(), "test", "testbag", "data", "bagitspec.pdf"))) def test_can_fetch_and_validate(self): self.bag.fetch(validate_downloads=True) self.assertEqual( self.bag.manifest_contents["data/bagitspec.pdf"], "4649c6540ac4e4dcf271ca236abfe62faa4d7f08", ) def set_fetch_contents(self): self.bag.add_fetch_entries([{ "url": "http://icanhascheezburger.files.wordpress.com/2007/06/stealing_my_bag.jpg", "filename": os.path.join("data", "stealin_mah_bag.jpg"), }]) self.assertTrue( os.path.exists( os.path.join(os.getcwd(), "test", "testbag", "data", "stealin_mah_bag.jpg")))
class FetchTest(unittest.TestCase): ### THESE MAY FAIL if the Websites change their files. In that case, ### just comment this test out, or replace the assertion with a new ### SHA1 Checksum. def setUp(self): self.bag = BagIt(os.path.join(os.getcwd(), 'test', 'testbag')) self.test_fetch_contents = [{'filename': u'data/bagitspec.pdf', 'length': u'-', 'url': u'http://www.digitalpreservation.gov/documents/bagitspec.pdf'}] def tearDown(self): # if os.path.exists(os.path.join(os.getcwd(), 'test', 'testbag', 'data', 'bagitspec.pdf')): # os.remove(os.path.join(os.getcwd(), 'test', 'testbag', 'data', 'bagitspec.pdf')) if os.path.exists(os.path.join(os.getcwd(), 'test', 'testbag', 'data', 'stealin_mah_bag.jpg')): os.remove(os.path.join(os.getcwd(), 'test', 'testbag', 'data', 'stealin_mah_bag.jpg')) if os.path.exists(os.path.join(os.getcwd(), 'test', 'testbag', 'data', 'bagitspec.pdf')): os.remove(os.path.join(os.getcwd(), 'test', 'testbag', 'data', 'bagitspec.pdf')) self.bag.add_fetch_entries(self.test_fetch_contents, append=False) def test_fetch_contents(self): self.assertEquals(self.bag.fetch_contents, self.test_fetch_contents) def test_can_fetch(self): self.bag.fetch() self.assertTrue(os.path.exists(os.path.join(os.getcwd(), 'test', 'testbag', 'data', 'bagitspec.pdf'))) def test_can_fetch_and_validate(self): self.bag.fetch(validate_downloads=True) self.assertEquals(self.bag.manifest_contents['data/bagitspec.pdf'], '4649c6540ac4e4dcf271ca236abfe62faa4d7f08') def set_fetch_contents(self): self.bag.add_fetch_entries([{'url': 'http://icanhascheezburger.files.wordpress.com/2007/06/stealing_my_bag.jpg', 'filename': os.path.join('data','stealin_mah_bag.jpg')}]) self.assertTrue(os.path.exists(os.path.join(os.getcwd(), 'test', 'testbag', 'data', 'stealin_mah_bag.jpg')))
class CompressTest(unittest.TestCase): def setUp(self): self.bag = BagIt(os.path.join(os.getcwd(), "test", "testbag")) def tearDown(self): if os.path.exists(os.path.join(os.getcwd(), "test", "testbag.tgz")): os.remove(os.path.join(os.getcwd(), "test", "testbag.tgz")) if os.path.exists(os.path.join(os.getcwd(), "test", "testbag.zip")): os.remove(os.path.join(os.getcwd(), "test", "testbag.zip")) if os.path.exists(os.path.join(os.getcwd(), "test", "newzipbag")): shutil.rmtree(os.path.join(os.getcwd(), "test", "newzipbag")) if os.path.exists(os.path.join(os.getcwd(), "test", "newtgzbag")): shutil.rmtree(os.path.join(os.getcwd(), "test", "newtgzbag")) if os.path.exists(os.path.join(os.getcwd(), "test", "newzipbag.zip")): os.remove(os.path.join(os.getcwd(), "test", "newzipbag.zip")) if os.path.exists(os.path.join(os.getcwd(), "test", "newtgzbag.tgz")): os.remove(os.path.join(os.getcwd(), "test", "newtgzbag.tgz")) def test_compress_tgz(self): self.bag.package(os.path.join(os.getcwd(), "test")) self.assertTrue( os.path.exists(os.path.join(os.getcwd(), "test", "testbag.tgz")) ) def test_compress_zip(self): self.bag.package(os.path.join(os.getcwd(), "test"), method="zip") self.assertTrue( os.path.exists(os.path.join(os.getcwd(), "test", "testbag.zip")) ) def test_uncompress_tgz(self): # create an empty tgz bag. newbag = BagIt(os.path.join(os.getcwd(), "test", "newtgzbag")) newbag.package(os.path.join(os.getcwd(), "test")) # remove the created bag directory shutil.rmtree(os.path.join(os.getcwd(), "test", "newtgzbag")) # this should leave us with just newtgzbag.tgz tgzbag = BagIt(os.path.join(os.getcwd(), "test", "newtgzbag.tgz")) self.assertTrue(os.path.exists(tgzbag.bag_directory)) def test_uncompress_zip(self): # create an empty zip bag. newbag = BagIt(os.path.join(os.getcwd(), "test", "newzipbag")) newbag.package(os.path.join(os.getcwd(), "test"), method="zip") # remove the created bag directory shutil.rmtree(os.path.join(os.getcwd(), "test", "newzipbag")) # this should leave us with just newtgzbag.tgz zipbag = BagIt(os.path.join(os.getcwd(), "test", "newzipbag.zip")) self.assertTrue(os.path.exists(zipbag.bag_directory))
def test_extended_bag_creation(self): newbag = BagIt(os.path.join(os.getcwd(), "test", "newtestbag")) self.assertTrue(os.path.exists(os.path.join(os.getcwd(), "test", "newtestbag"))) self.assertTrue( os.path.exists(os.path.join(os.getcwd(), "test", "newtestbag", "bagit.txt")) ) self.assertTrue( os.path.exists( os.path.join(os.getcwd(), "test", "newtestbag", "manifest-sha1.txt") ) ) self.assertTrue( os.path.exists(os.path.join(os.getcwd(), "test", "newtestbag", "data")) ) self.assertTrue( os.path.exists( os.path.join(os.getcwd(), "test", "newtestbag", "bag-info.txt") ) ) self.assertTrue( os.path.exists(os.path.join(os.getcwd(), "test", "newtestbag", "fetch.txt")) ) self.assertTrue( os.path.exists( os.path.join(os.getcwd(), "test", "newtestbag", "tagmanifest-sha1.txt") ) )
def test_extended_bag_creation(self): newbag = BagIt(os.path.join(os.getcwd(), 'test', 'newtestbag')) self.assertTrue( os.path.exists(os.path.join(os.getcwd(), 'test', 'newtestbag'))) self.assertTrue( os.path.exists( os.path.join(os.getcwd(), 'test', 'newtestbag', 'bagit.txt'))) self.assertTrue( os.path.exists( os.path.join(os.getcwd(), 'test', 'newtestbag', 'manifest-sha1.txt'))) self.assertTrue( os.path.exists( os.path.join(os.getcwd(), 'test', 'newtestbag', 'data'))) self.assertTrue( os.path.exists( os.path.join(os.getcwd(), 'test', 'newtestbag', 'bag-info.txt'))) self.assertTrue( os.path.exists( os.path.join(os.getcwd(), 'test', 'newtestbag', 'fetch.txt'))) self.assertTrue( os.path.exists( os.path.join(os.getcwd(), 'test', 'newtestbag', 'tagmanifest-sha1.txt')))
class CompressTest(unittest.TestCase): def setUp(self): self.bag = BagIt(os.path.join(os.getcwd(), 'test', 'testbag')) def tearDown(self): if os.path.exists(os.path.join(os.getcwd(), 'test', 'testbag.tgz')): os.remove(os.path.join(os.getcwd(), 'test', 'testbag.tgz')) if os.path.exists(os.path.join(os.getcwd(), 'test', 'testbag.zip')): os.remove(os.path.join(os.getcwd(), 'test', 'testbag.zip')) if os.path.exists(os.path.join(os.getcwd(), 'test', 'newzipbag')): shutil.rmtree(os.path.join(os.getcwd(), 'test', 'newzipbag')) if os.path.exists(os.path.join(os.getcwd(), 'test', 'newtgzbag')): shutil.rmtree(os.path.join(os.getcwd(), 'test', 'newtgzbag')) if os.path.exists(os.path.join(os.getcwd(), 'test', 'newzipbag.zip')): os.remove(os.path.join(os.getcwd(), 'test', 'newzipbag.zip')) if os.path.exists(os.path.join(os.getcwd(), 'test', 'newtgzbag.tgz')): os.remove(os.path.join(os.getcwd(), 'test', 'newtgzbag.tgz')) def test_compress_tgz(self): self.bag.package(os.path.join(os.getcwd(), 'test')) self.assertTrue(os.path.exists(os.path.join(os.getcwd(), 'test', 'testbag.tgz'))) def test_compress_zip(self): self.bag.package(os.path.join(os.getcwd(), 'test'), method='zip') self.assertTrue(os.path.exists(os.path.join(os.getcwd(), 'test', 'testbag.zip'))) def test_uncompress_tgz(self): # create an empty tgz bag. newbag = BagIt(os.path.join(os.getcwd(), 'test', 'newtgzbag')) newbag.package(os.path.join(os.getcwd(), 'test')) # remove the created bag directory shutil.rmtree(os.path.join(os.getcwd(), 'test', 'newtgzbag')) # this should leave us with just newtgzbag.tgz tgzbag = BagIt(os.path.join(os.getcwd(), 'test', 'newtgzbag.tgz')) self.assertTrue(os.path.exists(tgzbag.bag_directory)) def test_uncompress_zip(self): # create an empty zip bag. newbag = BagIt(os.path.join(os.getcwd(), 'test', 'newzipbag')) newbag.package(os.path.join(os.getcwd(), 'test'), method='zip') # remove the created bag directory shutil.rmtree(os.path.join(os.getcwd(), 'test', 'newzipbag')) # this should leave us with just newtgzbag.tgz zipbag = BagIt(os.path.join(os.getcwd(), 'test', 'newzipbag.zip')) self.assertTrue(os.path.exists(zipbag.bag_directory))
class VersionTest(unittest.TestCase): def setUp(self): self.bag = BagIt(os.path.join(os.getcwd(), 'test', 'testbag')) def tearDown(self): pass def test_versions(self): self.assertEquals(self.bag.bag_major_version, 0) self.assertEquals(self.bag.bag_minor_version, 96) binfo = self.bag.get_bag_info() self.assertEquals(binfo['version'], '0.96') self.assertEquals(binfo['encoding'], 'utf-8') self.assertEquals(binfo['hash'], 'sha1')
class VersionTest(unittest.TestCase): def setUp(self): self.bag = BagIt(os.path.join(os.getcwd(), "test", "testbag")) def tearDown(self): pass def test_versions(self): self.assertEqual(self.bag.bag_major_version, 0) self.assertEqual(self.bag.bag_minor_version, 96) binfo = self.bag.get_bag_info() self.assertEqual(binfo["version"], "0.96") self.assertEqual(binfo["encoding"], "utf-8") self.assertEqual(binfo["hash"], "sha1")
def run(self, package_id, *args, **kwargs): resultspackage = ResultsPackage.objects.get(pk=package_id) if resultspackage.status == RunJobStatus.CANCELLED: return resultspackage.status = ResultsPackageStatus.PROCESSING resultspackage.save() runjobs = resultspackage.workflow_run.run_jobs.select_related( 'page', 'job').all() if not resultspackage.pages.exists(): pages = set() for runjob in runjobs: pages.add(runjob.page) else: pages = resultspackage.pages.all() jobs = resultspackage.jobs.all() self.package_path = resultspackage.package_path # The chunks are intervals used to update the percent_completed field. if len(pages) > 0: page_chunk = 70.00 / len(pages) completed = 0.0 bag = BagIt(resultspackage.bag_path) for page in pages: page_dir = os.path.join(bag.data_directory, page.name) os.makedirs(page_dir) page_runjobs = runjobs.filter(page=page) if not jobs: # If no jobs are provided, we will just make a list of jobs from the available runjobs. jobs = [] if len(page_runjobs) > 0: runjob_chunk = page_chunk / len(page_runjobs) for runjob in page_runjobs: _add_result_to_bag(page_dir, runjob, bag) completed += runjob_chunk _ensure_db_state(resultspackage) _update_progress(resultspackage, completed) if runjob.workflow_job.job not in jobs: jobs.append(runjob.workflow_job.job) else: if len(jobs) > 0: job_chunk = page_chunk / len(jobs) for job in jobs: matcthing_runjobs = page_runjobs.filter( workflow_job__job=job) if len(matcthing_runjobs) > 0: runjob_chunk = job_chunk / len(matcthing_runjobs) for runjob in matcthing_runjobs: _add_result_to_bag(page_dir, runjob, bag) completed += runjob_chunk _ensure_db_state(resultspackage) _update_progress(resultspackage, completed) bag.update() errors = bag.validate() if not bag.is_valid: _ensure_db_state(resultspackage) resultspackage.status = ResultsPackageStatus.FAILED resultspackage.save() raise BagNotValidError("The bag failed validation.\n" + str(errors)) bag.package(resultspackage.package_path, method='zip') resultspackage.download_url = resultspackage.file_url resultspackage.percent_completed = 100 resultspackage.status = ResultsPackageStatus.COMPLETE # If pages and jobs were not provided, we populate these fields now # since we have figured them out. resultspackage.pages = pages resultspackage.jobs = jobs _ensure_db_state(resultspackage) resultspackage.save() shutil.rmtree(resultspackage.bag_path)
def test_unicode_characters_in_bagnam(self): newbag = BagIt(os.path.join(os.getcwd(), 'test', 'tëst')) self.assertTrue( os.path.exists(os.path.join(os.getcwd(), 'test', 'tëst')))
def setUp(self): self.bag = BagIt(os.path.join(os.getcwd(), 'test', 'testbag')) self.test_fetch_contents = [{'filename': u'data/bagitspec.pdf', 'length': u'-', 'url': u'http://www.digitalpreservation.gov/documents/bagitspec.pdf'}]
class ManifestTest(unittest.TestCase): def setUp(self): self.bag = BagIt(os.path.join(os.getcwd(), 'test', 'testbag')) def set_hash_md5(self): self.bag.set_hash_encoding('md5') self.assertEquals(self.hash_encoding, u'md5') def set_hash_sha1(self): self.bag.set_hash_encoding('sha1') self.assertEquals(self.hash_encoding, u'sha1') def test_sha1(self): self.bag.set_hash_encoding('sha1') self.bag.update() self.assertEquals(self.bag.manifest_contents[os.path.join('data', 'subdir', 'subsubdir', 'angry.jpg')], u'c5913ae67aa40398f1182e52d2fa2c2e4c08f696') def test_md5(self): self.bag.set_hash_encoding('md5') self.bag.update() self.assertEquals(self.bag.manifest_contents[os.path.join('data', 'subdir', 'subsubdir', 'angry.jpg')], '5f294603675cb6c0f83cef9316bb5be7') def test_sha1_manifest(self): self.bag.set_hash_encoding('sha1') self.bag.update() self.assertEquals(os.path.basename(self.bag.manifest_file), 'manifest-sha1.txt') def test_md5_manifest(self): self.bag.set_hash_encoding('md5') self.bag.update() self.assertEquals(os.path.basename(self.bag.manifest_file), 'manifest-md5.txt')
def setUp(self): print "Setting up version." self.bag = BagIt(os.path.join(os.getcwd(), 'test', 'testbag'))
class ManifestTest(unittest.TestCase): def setUp(self): self.bag = BagIt(os.path.join(os.getcwd(), 'test', 'testbag')) def set_hash_md5(self): self.bag.set_hash_encoding('md5') self.assertEquals(self.hash_encoding, u'md5') def set_hash_sha1(self): self.bag.set_hash_encoding('sha1') self.assertEquals(self.hash_encoding, u'sha1') def test_sha1(self): self.bag.set_hash_encoding('sha1') self.bag.update() self.assertEquals( self.bag.manifest_contents[os.path.join('data', 'subdir', 'subsubdir', 'angry.jpg')], u'c5913ae67aa40398f1182e52d2fa2c2e4c08f696') def test_md5(self): self.bag.set_hash_encoding('md5') self.bag.update() self.assertEquals( self.bag.manifest_contents[os.path.join('data', 'subdir', 'subsubdir', 'angry.jpg')], '5f294603675cb6c0f83cef9316bb5be7') def test_sha1_manifest(self): self.bag.set_hash_encoding('sha1') self.bag.update() self.assertEquals(os.path.basename(self.bag.manifest_file), 'manifest-sha1.txt') def test_md5_manifest(self): self.bag.set_hash_encoding('md5') self.bag.update() self.assertEquals(os.path.basename(self.bag.manifest_file), 'manifest-md5.txt')
def run(self, rp_id): rp_query = ResultsPackage.objects.filter(uuid=rp_id) rp_query.update(status=task_status.PROCESSING, celery_task_id=self.request.id) rp = rp_query.first() mode = rp.packaging_mode package_path = get_package_path(rp_id) output_objs = Output.objects.filter( run_job__workflow_run=rp.workflow_run ).select_related( 'resource', 'resource__resource_type', 'resource_list', 'run_job' ).prefetch_related( 'resource_list__resources' ).annotate( is_endpoint=Case( When( condition=( Q(resource__isnull=False) & ( Q(resource__inputs__isnull=True) | ~Q(resource__inputs__run_job__workflow_run=rp.workflow_run) ) ) | ( Q(resource_list__isnull=False) & ( Q(resource_list__inputs__isnull=True) | ~Q(resource_list__inputs__run_job__workflow_run=rp.workflow_run) ) ), then=Value(True) ), default=Value(False), output_field=BooleanField() ) ) if len(output_objs) > 0: percentage_increment = 70.00 / len(output_objs) else: percentage_increment = 0 completed = 0.0 with TemporaryDirectory() as td: tmp_dir = os.path.join(td, rp_id) # because rp_id will be name of the packaged zip bag = BagIt(tmp_dir) job_namefinder = self._NameFinder() res_namefinder = self._NameFinder() for output in output_objs: if mode == 0: # only endpoint resources, subdirectoried by different outputs # continue if not endpoint output if output.is_endpoint is False: continue j_name = job_namefinder.find(output.run_job.workflow_job_id, output.run_job.job_name) opt_name = output.output_port_type_name op_dir = os.path.join(tmp_dir, "{0} - {1}".format(j_name, opt_name)) rj_status = output.run_job.status if rj_status == task_status.FINISHED: if output.resource is not None: filepath = output.resource.resource_file.path ext = os.path.splitext(filepath)[1] res_name = res_namefinder.find(output.resource_id, output.resource.name) # [TODO]: or... find the modified resource name if the resource_uuid still exists? result_filename = "{0}{1}".format(res_name, ext) if not os.path.exists(op_dir): os.makedirs(op_dir) shutil.copyfile(filepath, os.path.join(op_dir, result_filename)) elif output.resource_list is not None: res_name = res_namefinder.find(output.resource_list_id, output.resource_list.name) # [TODO]: or... find the modified resource name if the resource_uuid still exists? result_foldername = "{0}.list".format(res_name) result_folder = os.path.join(op_dir, result_foldername) if not os.path.exists(result_folder): os.makedirs(result_folder) cnt = output.resource_list.resources.count() zfills = len(str(cnt)) for idx, r in enumerate(output.resource_list.resources.all()): filepath = r.resource_file.path ext = os.path.splitext(filepath)[1] new_filename = "{0}{1}".format(str(idx).zfill(zfills), ext) shutil.copyfile(filepath, os.path.join(result_folder, new_filename)) elif mode == 1: res_name = res_namefinder.find(output.resource_id, output.resource.name) # [TODO]: or... find the modified resource name if the resource_uuid still exists? res_dir = os.path.join(tmp_dir, res_name) j_name = job_namefinder.find(output.run_job.workflow_job_id, output.run_job.job_name) opt_name = output.output_port_type_name rj_status = output.run_job.status if rj_status == task_status.FINISHED: if output.resource is not None: filepath = output.resource.resource_file.path ext = os.path.splitext(filepath)[1] result_filename = "{0} - {1}{2}".format(j_name, opt_name, ext) if not os.path.exists(res_dir): os.makedirs(res_dir) shutil.copyfile(filepath, os.path.join(res_dir, result_filename)) elif output.resource_list is not None: result_foldername = "{0} - {1}.list".format(j_name, opt_name) result_folder = os.path.join(res_dir, result_foldername) if not os.path.exists(result_folder): os.makedirs(result_folder) cnt = output.resource_list.resources.count() zfills = len(str(cnt)) for idx, r in enumerate(output.resource_list.resources.all()): filepath = r.resource_file.path ext = os.path.splitext(filepath)[1] new_filename = "{0}{1}".format(str(idx).zfill(zfills), ext) shutil.copyfile(filepath, os.path.join(result_folder, new_filename)) elif rj_status == task_status.FAILED: result_filename = "{0} - {1} - ERROR.txt".format(j_name, opt_name) if not os.path.exists(res_dir): os.makedirs(res_dir) with open(os.path.join(res_dir, result_filename), 'w') as f: f.write("Error Summary: ") f.write(output.run_job.error_summary) f.write("\n\nError Details:\n") f.write(output.run_job.error_details) elif mode == 2: raise NotImplementedError() # [TODO] else: raise ValueError("mode {0} is not supported".format(mode)) completed += percentage_increment rp_query.update(percent_completed=int(completed)) #print [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(tmp_dir)) for f in fn] # DEBUG bag.update() errors = bag.validate() if not bag.is_valid: rp_query.update(status=task_status.FAILED, error_summary="The bag failed validation.", error_details=str(errors)) target_dir_name = os.path.dirname(package_path) if not os.path.isdir(target_dir_name): os.makedirs(target_dir_name) bag.package(target_dir_name, method='zip') rp_query.update(status=task_status.FINISHED, percent_completed=100) expiry_time = rp_query.values_list('expiry_time', flat=True)[0] if expiry_time: async_task = registry.tasks['rodan.core.expire_package'].apply_async((rp_id, ), eta=expiry_time) expire_task_id = async_task.task_id else: expire_task_id = None rp_query.update(celery_task_id=expire_task_id) return True
def run(self, package_id, *args, **kwargs): resultspackage = ResultsPackage.objects.get(pk=package_id) if resultspackage.status == RunJobStatus.CANCELLED: return resultspackage.status = ResultsPackageStatus.PROCESSING resultspackage.save() runjobs = resultspackage.workflow_run.run_jobs.select_related('page', 'job').all() if not resultspackage.pages.exists(): pages = set() for runjob in runjobs: pages.add(runjob.page) else: pages = resultspackage.pages.all() jobs = resultspackage.jobs.all() self.package_path = resultspackage.package_path # The chunks are intervals used to update the percent_completed field. if len(pages) > 0: page_chunk = 70.00 / len(pages) completed = 0.0 bag = BagIt(resultspackage.bag_path) for page in pages: page_dir = os.path.join(bag.data_directory, page.name) os.makedirs(page_dir) page_runjobs = runjobs.filter(page=page) if not jobs: # If no jobs are provided, we will just make a list of jobs from the available runjobs. jobs = [] if len(page_runjobs) > 0: runjob_chunk = page_chunk / len(page_runjobs) for runjob in page_runjobs: _add_result_to_bag(page_dir, runjob, bag) completed += runjob_chunk _ensure_db_state(resultspackage) _update_progress(resultspackage, completed) if runjob.workflow_job.job not in jobs: jobs.append(runjob.workflow_job.job) else: if len(jobs) > 0: job_chunk = page_chunk / len(jobs) for job in jobs: matcthing_runjobs = page_runjobs.filter(workflow_job__job=job) if len(matcthing_runjobs) > 0: runjob_chunk = job_chunk / len(matcthing_runjobs) for runjob in matcthing_runjobs: _add_result_to_bag(page_dir, runjob, bag) completed += runjob_chunk _ensure_db_state(resultspackage) _update_progress(resultspackage, completed) bag.update() errors = bag.validate() if not bag.is_valid: _ensure_db_state(resultspackage) resultspackage.status = ResultsPackageStatus.FAILED resultspackage.save() raise BagNotValidError("The bag failed validation.\n" + str(errors)) bag.package(resultspackage.package_path, method='zip') resultspackage.download_url = resultspackage.file_url resultspackage.percent_completed = 100 resultspackage.status = ResultsPackageStatus.COMPLETE # If pages and jobs were not provided, we populate these fields now # since we have figured them out. resultspackage.pages = pages resultspackage.jobs = jobs _ensure_db_state(resultspackage) resultspackage.save() shutil.rmtree(resultspackage.bag_path)
def setUp(self): print "Setting up update." self.bag = BagIt(os.path.join(os.getcwd(), 'test', 'testbag')) self.invalid_bag = BagIt(os.path.join(os.getcwd(), 'test', 'invalid_bag'))
def run(self, rp_id): rp_query = ResultsPackage.objects.filter(uuid=rp_id) rp_query.update(status=task_status.PROCESSING, celery_task_id=self.request.id) rp = rp_query.first() mode = rp.packaging_mode package_path = get_package_path(rp_id) output_objs = ( Output.objects.filter( run_job__workflow_run=rp.workflow_run).select_related( "resource", "resource__resource_type", "resource_list", "run_job").prefetch_related("resource_list__resources"). annotate(is_endpoint=Case( When( condition=( Q(resource__isnull=False) & (Q(resource__inputs__isnull=True) | ~Q(resource__inputs__run_job__workflow_run=rp. workflow_run))) | (Q(resource_list__isnull=False) & (Q(resource_list__inputs__isnull=True) | ~Q(resource_list__inputs__run_job__workflow_run=rp. workflow_run))), then=Value(True), ), default=Value(False), output_field=BooleanField(), ))) if len(output_objs) > 0: percentage_increment = 70.00 / len(output_objs) else: percentage_increment = 0 completed = 0.0 with TemporaryDirectory() as td: tmp_dir = os.path.join( td, rp_id) # because rp_id will be name of the packaged zip bag = BagIt(tmp_dir) job_namefinder = self._NameFinder() res_namefinder = self._NameFinder() for output in output_objs: if mode == 0: # only endpoint resources, subdirectoried by different outputs # continue if not endpoint output if output.is_endpoint is False: continue j_name = job_namefinder.find( output.run_job.workflow_job_id, output.run_job.job_name) opt_name = output.output_port_type_name op_dir = os.path.join(tmp_dir, "{0} - {1}".format(j_name, opt_name)) rj_status = output.run_job.status if rj_status == task_status.FINISHED: if output.resource is not None: filepath = output.resource.resource_file.path ext = os.path.splitext(filepath)[1] res_name = res_namefinder.find( output.resource_id, output.resource.name ) # [TODO]: or... find the modified resource name if the resource_uuid still exists? result_filename = "{0}{1}".format(res_name, ext) if not os.path.exists(op_dir): os.makedirs(op_dir) shutil.copyfile( filepath, os.path.join(op_dir, result_filename)) elif output.resource_list is not None: res_name = res_namefinder.find( output.resource_list_id, output.resource_list.name ) # [TODO]: or... find the modified resource name if the resource_uuid still exists? result_foldername = "{0}.list".format(res_name) result_folder = os.path.join( op_dir, result_foldername) if not os.path.exists(result_folder): os.makedirs(result_folder) cnt = output.resource_list.resources.count() zfills = len(str(cnt)) for idx, r in enumerate( output.resource_list.resources.all()): filepath = r.resource_file.path ext = os.path.splitext(filepath)[1] new_filename = "{0}{1}".format( str(idx).zfill(zfills), ext) shutil.copyfile( filepath, os.path.join(result_folder, new_filename)) elif mode == 1: res_name = res_namefinder.find( output.resource_id, output.resource.name ) # [TODO]: or... find the modified resource name if the resource_uuid still exists? res_dir = os.path.join(tmp_dir, res_name) j_name = job_namefinder.find( output.run_job.workflow_job_id, output.run_job.job_name) opt_name = output.output_port_type_name rj_status = output.run_job.status if rj_status == task_status.FINISHED: if output.resource is not None: filepath = output.resource.resource_file.path ext = os.path.splitext(filepath)[1] result_filename = "{0} - {1}{2}".format( j_name, opt_name, ext) if not os.path.exists(res_dir): os.makedirs(res_dir) shutil.copyfile( filepath, os.path.join(res_dir, result_filename)) elif output.resource_list is not None: result_foldername = "{0} - {1}.list".format( j_name, opt_name) result_folder = os.path.join( res_dir, result_foldername) if not os.path.exists(result_folder): os.makedirs(result_folder) cnt = output.resource_list.resources.count() zfills = len(str(cnt)) for idx, r in enumerate( output.resource_list.resources.all()): filepath = r.resource_file.path ext = os.path.splitext(filepath)[1] new_filename = "{0}{1}".format( str(idx).zfill(zfills), ext) shutil.copyfile( filepath, os.path.join(result_folder, new_filename)) elif rj_status == task_status.FAILED: result_filename = "{0} - {1} - ERROR.txt".format( j_name, opt_name) if not os.path.exists(res_dir): os.makedirs(res_dir) with open(os.path.join(res_dir, result_filename), "w") as f: f.write("Error Summary: ") f.write(output.run_job.error_summary) f.write("\n\nError Details:\n") f.write(output.run_job.error_details) elif mode == 2: raise NotImplementedError() # [TODO] else: raise ValueError("mode {0} is not supported".format(mode)) completed += percentage_increment rp_query.update(percent_completed=int(completed)) # print([os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(tmp_dir)) for f in fn]) # DEBUG bag.update() errors = bag.validate() if not bag.is_valid: rp_query.update( status=task_status.FAILED, error_summary="The bag failed validation.", error_details=str(errors), ) target_dir_name = os.path.dirname(package_path) if not os.path.isdir(target_dir_name): os.makedirs(target_dir_name) bag.package(target_dir_name, method="zip") rp_query.update(status=task_status.FINISHED, percent_completed=100) expiry_time = rp_query.values_list("expiry_time", flat=True)[0] if expiry_time: async_task = registry.tasks[ "rodan.core.expire_package"].apply_async((rp_id, ), eta=expiry_time, queue="celery") expire_task_id = async_task.task_id else: expire_task_id = None rp_query.update(celery_task_id=expire_task_id) return True
def setUp(self): self.bag = BagIt(os.path.join(os.getcwd(), 'test', 'testbag'))
class ManifestTest(unittest.TestCase): def setUp(self): self.bag = BagIt(os.path.join(os.getcwd(), "test", "testbag")) def set_hash_md5(self): self.bag.set_hash_encoding("md5") self.assertEqual(self.hash_encoding, "md5") def set_hash_sha1(self): self.bag.set_hash_encoding("sha1") self.assertEqual(self.hash_encoding, "sha1") def test_sha1(self): self.bag.set_hash_encoding("sha1") self.bag.update() self.assertEqual( self.bag.manifest_contents[os.path.join("data", "subdir", "subsubdir", "angry.jpg")], "c5913ae67aa40398f1182e52d2fa2c2e4c08f696", ) def test_md5(self): self.bag.set_hash_encoding("md5") self.bag.update() self.assertEqual( self.bag.manifest_contents[os.path.join("data", "subdir", "subsubdir", "angry.jpg")], "5f294603675cb6c0f83cef9316bb5be7", ) def test_sha1_manifest(self): self.bag.set_hash_encoding("sha1") self.bag.update() self.assertEqual(os.path.basename(self.bag.manifest_file), "manifest-sha1.txt") def test_md5_manifest(self): self.bag.set_hash_encoding("md5") self.bag.update() self.assertEqual(os.path.basename(self.bag.manifest_file), "manifest-md5.txt")
def setUp(self): self.bag = BagIt(os.path.join(os.getcwd(), "test", "testbag"))
def setUp(self): print "setting up manifest." self.bag = BagIt(os.path.join(os.getcwd(), 'test', 'testbag'))