def testMigrateStoreWithSpaces(self): dest = Location.get_location('test') local = Location.get_location('local') datafile, replica = generate_datafile('1/1/Hi Mum', self.dataset, "Hi mum") datafile2, replica2 = generate_datafile('1/1/Hi Dad', self.dataset, "Hi dad") path = datafile.get_absolute_filepath() self.assertTrue(os.path.exists(path)) path2 = datafile.get_absolute_filepath() self.assertTrue(os.path.exists(path2)) # Migrate them migrate_replica(replica, dest) self.assertFalse(os.path.exists(path)) migrate_replica(replica2, dest) self.assertFalse(os.path.exists(path2)) # Bring them back migrate_replica(datafile.get_preferred_replica(), local) self.assertTrue(os.path.exists(path)) migrate_replica(datafile2.get_preferred_replica(), local) self.assertTrue(os.path.exists(path2))
def setUp(self): self.user = generate_user('fred') Location.force_initialize() self.experiment = generate_experiment(users=[self.user]) self.dataset = generate_dataset(experiments=[self.experiment]) self.server = SimpleHttpTestServer() self.server.start()
def testReplicaVerify(self): from django.conf import settings saved = settings.REQUIRE_DATAFILE_CHECKSUMS try: Location.get_location('test') datafile, replica = generate_datafile("1/2/3", self.dataset, "Hi mum") settings.REQUIRE_DATAFILE_CHECKSUMS = True self.assertTrue(replica.verify(), 'Replica.verify() failed.') replica.datafile.sha512sum = '' replica.datafile.md5sum = '' self.assertFalse( replica.verify(), 'Replica.verify() succeeded despite no checksum ' '(settings.REQUIRE_DATAFILE_CHECKSUMS=True).') self.assertFalse( replica.verify(allowEmptyChecksums=False), 'Replica.verify() succeeded despite no checksum ' '(allowEmptyChecksums=False)') settings.REQUIRE_DATAFILE_CHECKSUMS = False datafile.sha512sum = None datafile.md5sum = None self.assertTrue( replica.verify(allowEmptyChecksums=True), 'Replica.verify() failed wrongly ' '(allowEmptyChecksums=True)') datafile.sha512sum = None datafile.md5sum = None self.assertTrue(replica.verify(), 'Replica.verify() failed wrongly') finally: settings.REQUIRE_DATAFILE_CHECKSUMS = saved
def test_location(self): from tardis.tardis_portal.models import Location self.assertEquals(Location.get_default_location().name, 'local') self.assertEquals(Location.get_location('staging').name, 'staging') self.assertEquals(len(Location.objects.all()), 6)
def setUp(self): from tardis.tardis_portal import models from tempfile import mkdtemp, mktemp from django.conf import settings from os import path import os # Disconnect post_save signal from django.db.models.signals import post_save from tardis.tardis_portal.models import \ staging_hook, Dataset_File, Replica, Location post_save.disconnect(staging_hook, sender=Replica) from django.contrib.auth.models import User user = '******' pwd = 'secret' email = '' self.user = User.objects.create_user(user, email, pwd) try: os.makedirs(settings.GET_FULL_STAGING_PATH_TEST) except OSError: pass self.temp = mkdtemp(dir=settings.GET_FULL_STAGING_PATH_TEST) self.file = mktemp(dir=self.temp) content = 'test file' with open(self.file, "w+b") as f: f.write(content) Location.force_initialize() # make datafile exp = models.Experiment(title='test exp1', institution_name='monash', created_by=self.user) exp.save() # make dataset dataset = models.Dataset(description="dataset description...") dataset.save() dataset.experiments.add(exp) dataset.save() # create datafile df = models.Dataset_File(dataset=dataset, size = len(content), filename = path.basename(self.file), md5sum='f20d9f2072bbeb6691c0f9c5099b01f3') df.save() # create replica base_url = 'file://' + settings.GET_FULL_STAGING_PATH_TEST location = Location.load_location({ 'name': 'staging-test-yyy', 'url': base_url, 'type': 'external', 'priority': 10, 'transfer_provider': 'local'}) replica = models.Replica(datafile=df, url='file://'+self.file, protocol="staging",location=location) replica.verify() replica.save() self.replica = replica
def testReplicaVerify(self): from django.conf import settings saved = settings.REQUIRE_DATAFILE_CHECKSUMS try: Location.get_location('test') datafile, replica = generate_datafile("1/2/3", self.dataset, "Hi mum") settings.REQUIRE_DATAFILE_CHECKSUMS = True self.assertTrue(replica.verify(), 'Replica.verify() failed.') replica.datafile.sha512sum = '' replica.datafile.md5sum = '' self.assertFalse( replica.verify(), 'Replica.verify() succeeded despite no checksum ' '(settings.REQUIRE_DATAFILE_CHECKSUMS=True).') self.assertFalse(replica.verify(allowEmptyChecksums=False), 'Replica.verify() succeeded despite no checksum ' '(allowEmptyChecksums=False)') settings.REQUIRE_DATAFILE_CHECKSUMS = False datafile.sha512sum = None datafile.md5sum = None self.assertTrue(replica.verify(allowEmptyChecksums=True), 'Replica.verify() failed wrongly ' '(allowEmptyChecksums=True)') datafile.sha512sum = None datafile.md5sum = None self.assertTrue(replica.verify(), 'Replica.verify() failed wrongly') finally: settings.REQUIRE_DATAFILE_CHECKSUMS = saved
def setUp(self): # Create test owner without enough details username, email, password = ('testuser', '*****@*****.**', 'password') user = User.objects.create_user(username, email, password) profile = UserProfile(user=user, isDjangoAccount=True) profile.save() Location.force_initialize() # Create test experiment and make user the owner of it experiment = Experiment(title='Text Experiment', institution_name='Test Uni', created_by=user) experiment.save() acl = ObjectACL( pluginId='django_user', entityId=str(user.id), content_object=experiment, canRead=True, canWrite=True, canDelete=True, isOwner=True, aclOwnershipType=ObjectACL.OWNER_OWNED) acl.save() dataset = Dataset(description='dataset description...') dataset.save() dataset.experiments.add(experiment) dataset.save() def create_datafile(filename): testfile = path.join(path.dirname(__file__), 'fixtures', filename) size, sha512sum = get_size_and_sha512sum(testfile) datafile = Dataset_File(dataset=dataset, filename=path.basename(testfile), size=size, sha512sum=sha512sum) datafile.save() base_url = 'file://' + path.abspath(path.dirname(testfile)) location = Location.load_location({ 'name': 'test-grabber', 'url': base_url, 'type': 'external', 'priority': 10, 'transfer_provider': 'local'}) replica = Replica(datafile=datafile, url='file://'+path.abspath(testfile), protocol='file', location=location) replica.verify() replica.save() return Dataset_File.objects.get(pk=datafile.pk) self.dataset = dataset self.datafiles = [create_datafile('data_grabber_test1.admin'), create_datafile('testfile.txt') ]
def setUp(self): from os import path, mkdir from tempfile import mkdtemp user = '******' pwd = 'secret' email = '' self.user = User.objects.create_user(user, email, pwd) self.userProfile = UserProfile(user=self.user).save() self.test_dir = mkdtemp() Location.force_initialize() self.exp = Experiment(title='test exp1', institution_name='monash', created_by=self.user) self.exp.save() acl = ObjectACL( pluginId=django_user, entityId=str(self.user.id), content_object=self.exp, canRead=True, isOwner=True, aclOwnershipType=ObjectACL.OWNER_OWNED, ) acl.save() self.dataset = \ Dataset(description='dataset description...') self.dataset.save() self.dataset.experiments.add(self.exp) self.dataset.save() self.experiment_path = path.join( settings.FILE_STORE_PATH, str(self.dataset.get_first_experiment().id)) self.dataset_path = path.join(self.experiment_path, str(self.dataset.id)) if not path.exists(self.experiment_path): mkdir(self.experiment_path) if not path.exists(self.dataset_path): mkdir(self.dataset_path) # write test file self.filename = 'testfile.txt' self.f1 = open(path.join(self.test_dir, self.filename), 'w') self.f1.write('Test file 1') self.f1.close() self.f1_size = path.getsize(path.join(self.test_dir, self.filename)) self.f1 = open(path.join(self.test_dir, self.filename), 'r')
def setUp(self): # Create test owner without enough details username, email, password = ('testuser', '*****@*****.**', 'password') user = User.objects.create_user(username, email, password) profile = UserProfile(user=user, isDjangoAccount=True) profile.save() Location.force_initialize() # Create test experiment and make user the owner of it experiment = Experiment(title='Text Experiment', institution_name='Test Uni', created_by=user) experiment.save() acl = ObjectACL( pluginId='django_user', entityId=str(user.id), content_object=experiment, canRead=True, isOwner=True, aclOwnershipType=ObjectACL.OWNER_OWNED, ) acl.save() dataset = Dataset(description='dataset description...') dataset.save() dataset.experiments.add(experiment) dataset.save() def create_datafile(index): testfile = path.join(path.dirname(__file__), 'fixtures', 'jeol_sem_test%d.txt' % index) size, sha512sum = get_size_and_sha512sum(testfile) datafile = Dataset_File(dataset=dataset, filename=path.basename(testfile), size=size, sha512sum=sha512sum) datafile.save() base_url = 'file://' + path.abspath(path.dirname(testfile)) location = Location.load_location({ 'name': 'test-jeol', 'url': base_url, 'type': 'external', 'priority': 10, 'transfer_provider': 'local' }) replica = Replica(datafile=datafile, url='file://' + path.abspath(testfile), protocol='file', location=location) replica.verify() replica.save() return Dataset_File.objects.get(pk=datafile.pk) self.dataset = dataset self.datafiles = [create_datafile(i) for i in (1, 2)]
def setUp(self): from django.contrib.auth.models import User from tardis.tardis_portal.models import Location user = '******' pwd = 'secret' email = '' self.user = User.objects.create_user(user, email, pwd) Location.force_initialize()
def setUp(self): self.user = generate_user('fred') Location.force_initialize() self.experiment = generate_experiment( users=[self.user], title='Meanwhile, down in the archives ...', url='http://example.com/something') self.dataset = generate_dataset(experiments=[self.experiment])
def setUp(self): from os import path, mkdir from tempfile import mkdtemp user = '******' pwd = 'secret' email = '' self.user = User.objects.create_user(user, email, pwd) self.userProfile = UserProfile(user=self.user).save() self.test_dir = mkdtemp() Location.force_initialize() self.exp = Experiment(title='test exp1', institution_name='monash', created_by=self.user) self.exp.save() acl = ObjectACL( pluginId=django_user, entityId=str(self.user.id), content_object=self.exp, canRead=True, isOwner=True, aclOwnershipType=ObjectACL.OWNER_OWNED, ) acl.save() self.dataset = \ Dataset(description='dataset description...') self.dataset.save() self.dataset.experiments.add(self.exp) self.dataset.save() self.experiment_path = path.join(settings.FILE_STORE_PATH, str(self.dataset.get_first_experiment().id)) self.dataset_path = path.join(self.experiment_path, str(self.dataset.id)) if not path.exists(self.experiment_path): mkdir(self.experiment_path) if not path.exists(self.dataset_path): mkdir(self.dataset_path) # write test file self.filename = 'testfile.txt' self.f1 = open(path.join(self.test_dir, self.filename), 'w') self.f1.write('Test file 1') self.f1.close() self.f1_size = path.getsize(path.join(self.test_dir, self.filename)) self.f1 = open(path.join(self.test_dir, self.filename), 'r')
def _infer_location(path): if urlparse.urlparse(path).scheme == '': loc = Location.get_default_location() else: loc = Location.get_location_for_url(path) if loc: return loc else: raise Exception('Cannot infer a location for %s' % path)
def testRemoveExperimentData(self): # First with no sharing self._build() archive_location = Location.get_location('archtest') try: nos_experiments = Experiment.objects.count() nos_datasets = Dataset.objects.count() nos_datafiles = Dataset_File.objects.count() nos_replicas = Replica.objects.count() self.assertTrue(exists(self.replica.get_absolute_filepath())) remove_experiment_data(self.experiment, 'http://example.com/some.tar.gz', archive_location) self.assertEquals(nos_experiments, Experiment.objects.count()) self.assertEquals(nos_datasets, Dataset.objects.count()) self.assertEquals(nos_datafiles, Dataset_File.objects.count()) self.assertEquals(nos_replicas, Replica.objects.count()) new_replica = self.datafile.get_preferred_replica() self.assertTrue(self.replica.id != new_replica.id) self.assertFalse(new_replica.stay_remote) self.assertTrue(new_replica.verified) self.assertEqual(self.replica.protocol, new_replica.protocol) self.assertEqual(archive_location.id, new_replica.location.id) self.assertEqual('http://example.com/some.tar.gz#1/1/1', new_replica.url) self.assertFalse(exists(self.replica.get_absolute_filepath())) finally: self._clear() # (Check that the deletes cascaded ... ) self.assertEquals(0, Dataset_File.objects.count()) self.assertEquals(0, Replica.objects.count()) # Repeat, but with the first dataset in 2 experiments. self._build() self.dataset.experiments.add(self.experiment2) archive_location = Location.get_location('archtest') try: nos_experiments = Experiment.objects.count() nos_datasets = Dataset.objects.count() nos_datafiles = Dataset_File.objects.count() nos_replicas = Replica.objects.count() self.assertTrue(exists(self.replica.get_absolute_filepath())) remove_experiment_data(self.experiment, 'http://example.com/some.tar.gz', archive_location) self.assertEquals(nos_experiments, Experiment.objects.count()) self.assertEquals(nos_datasets, Dataset.objects.count()) self.assertEquals(nos_datafiles, Dataset_File.objects.count()) self.assertEquals(nos_replicas, Replica.objects.count()) new_replica = self.datafile.get_preferred_replica() self.assertTrue(self.replica.id == new_replica.id) self.assertTrue(exists(self.replica.get_absolute_filepath())) self.assertFalse(exists(self.replica2.get_absolute_filepath())) finally: self._clear()
def setUpClass(cls): cls.priorcwd = os.getcwd() os.chdir(os.path.dirname(__file__)+'/atom_test') cls.server = TestWebServer() cls.server.start() Location.force_initialize() Location.load_location({ 'name': 'test-atom', 'transfer_provider': 'http', 'url': 'http://localhost:4272/files/', 'type': 'external', 'priority': 10}) Location.load_location({ 'name': 'test-atom2', 'transfer_provider': 'http', 'url': 'http://mydatagrabber.cmm.uq.edu.au/files', 'type': 'external', 'priority': 10}) files = path.realpath(path.join(path.dirname(__file__), 'atom_test', 'files')) Location.load_location({ 'name': 'test-atom3', 'transfer_provider': 'local', 'url': 'file://' + files, 'type': 'external', 'priority': 10})
def setUpClass(cls): cls.priorcwd = os.getcwd() os.chdir(os.path.dirname(__file__) + '/atom_test') cls.server = TestWebServer() cls.server.start() Location.force_initialize() Location.load_location({ 'name': 'test-atom', 'transfer_provider': 'http', 'url': 'http://localhost:4272/files/', 'type': 'external', 'priority': 10 }) Location.load_location({ 'name': 'test-atom2', 'transfer_provider': 'http', 'url': 'http://mydatagrabber.cmm.uq.edu.au/files', 'type': 'external', 'priority': 10 }) files = path.realpath( path.join(path.dirname(__file__), 'atom_test', 'files')) Location.load_location({ 'name': 'test-atom3', 'transfer_provider': 'local', 'url': 'file://' + files, 'type': 'external', 'priority': 10 })
def testScoring(self): self._setup() scorer = MigrationScorer(Location.get_location('local').id) self.assertEquals(2.0, scorer.datafile_score(self.df1)) self.assertEquals(2, get_user_priority(self.user1)) self.assertEquals(1, get_user_priority(self.user2)) self.assertEquals(1.0, scorer.user_score(self.user1)) self.assertEquals(2.0, scorer.user_score(self.user2)) self.assertEquals(2.0, scorer.experiment_score(self.exp1)) self.assertEquals(2.0, scorer.dataset_score(self.df1.dataset)) self.assertEquals(4.0, scorer.score_datafile(self.df1)) self.assertEquals([(self.df1, self.rep1, 4.0)], scorer.score_datafiles_in_dataset(self.ds1)) self.assertEquals([(self.df5, self.rep5, 8.0), (self.df4, self.rep4, 6.0), (self.df1, self.rep1, 4.0)], scorer.score_datafiles_in_experiment(self.exp1)) self.assertEquals([(self.df5, self.rep5, 8.0), (self.df4, self.rep4, 6.0)], scorer.score_datafiles_in_experiment(self.exp2)) self.assertEquals([(self.df6, self.rep6, 5.0)], scorer.score_datafiles_in_experiment(self.exp3)) self.assertEquals([(self.df5, self.rep5, 8.0), (self.df4, self.rep4, 6.0), (self.df6, self.rep6, 5.0), (self.df1, self.rep1, 4.0), (self.df7, self.rep7, 0.0), (self.df8, self.rep8, 0.0)], scorer.score_all_datafiles()) self.assertEquals([(self.df7, self.rep7, 0.0), (self.df8, self.rep8, 0.0)], scorer.score_datafiles_in_dataset(self.ds4))
def testScoringWithTimes(self): self._setup() scorer = MigrationScorer( Location.get_location('local').id, { 'user_priority_weighting': [5.0, 2.0, 1.0, 0.5, 0.2], 'file_size_weighting': 1.0, 'file_access_weighting': 1.0, 'file_age_weighting': 1.0, 'file_size_threshold': 0, 'file_access_threshold': 0, 'file_age_threshold': 1}) self.assertEquals(0.0, scorer.datafile_score(self.df1)) f = tempfile.NamedTemporaryFile(dir=settings.FILE_STORE_PATH) f.write("Hi Mom!!\n") rep = Replica.objects.get(pk=self.rep1.pk) rep.url = f.name rep.save() self.assertEquals(2.0, scorer.datafile_score(self.df1)) older = time.time() - (60 * 60 * 24 + 300) os.utime(f.name, (older, older)) self.assertEquals(3.0, scorer.datafile_score(self.df1)) older = time.time() - (60 * 60 * 24 * 2 + 300) os.utime(f.name, (older, older)) self.assertEquals(5.0, scorer.datafile_score(self.df1)) f.close()
def stage_replica(replica): from django.core.files.uploadedfile import TemporaryUploadedFile from tardis.tardis_portal.models import Replica, Location if not replica.location.type == 'external': raise ValueError('Only external replicas can be staged') if getattr(settings, "DEEP_DATASET_STORAGE", False): relurl = path.relpath(replica.url[7:], settings.SYNC_TEMP_PATH) spliturl = relurl.split(os.sep)[1:] subdir = path.dirname(path.join(*spliturl)) else: subdir = None with TemporaryUploadedFile(replica.datafile.filename, None, None, None) as tf: if replica.verify(tempfile=tf.file): if not replica.stay_remote: tf.file.flush() target_replica = { 'datafile': replica.datafile, 'url': write_uploaded_file_to_dataset(replica.datafile.dataset, tf, subdir=subdir), 'location': Location.get_default_location(), 'verified': True, 'protocol': '' } Replica.objects.filter(id=replica.id).update(**target_replica) return True else: return False
def remove_experiment_data(exp, archive_url, archive_location): """Remove the online Replicas for an Experiment that are not shared with other Experiments. When Replicas are removed, they are replaced with offline replicas whose 'url' consists of the archive_url, with the archive pathname for the datafile as a url fragment id. """ for ds in Dataset.objects.filter(experiments=exp): if ds.experiments.count() == 1: for df in Dataset_File.objects.filter(dataset=ds): replicas = Replica.objects.filter(datafile=df, location__type='online') if replicas.count() > 0: for replica in replicas: location = Location.get_location(replica.location.name) location.provider.remove_file(replica) if archive_url: old_replica = replicas[0] path_in_archive = '%s/%s/%s' % ( exp.id, ds.id, df.filename) new_replica_url = '%s#%s' % ( archive_url, quote(path_in_archive)) new_replica = Replica(datafile=old_replica.datafile, url=new_replica_url, protocol=old_replica.protocol, verified=True, stay_remote=False, location=archive_location) new_replica.save() replicas.delete()
def testScoringWithTimes(self): self._setup() scorer = MigrationScorer( Location.get_location('local').id, { 'user_priority_weighting': [5.0, 2.0, 1.0, 0.5, 0.2], 'file_size_weighting': 1.0, 'file_access_weighting': 1.0, 'file_age_weighting': 1.0, 'file_size_threshold': 0, 'file_access_threshold': 0, 'file_age_threshold': 1 }) self.assertEquals(0.0, scorer.datafile_score(self.df1)) f = tempfile.NamedTemporaryFile(dir=settings.FILE_STORE_PATH) f.write("Hi Mom!!\n") rep = Replica.objects.get(pk=self.rep1.pk) rep.url = f.name rep.save() self.assertEquals(2.0, scorer.datafile_score(self.df1)) older = time.time() - (60 * 60 * 24 + 300) os.utime(f.name, (older, older)) self.assertEquals(3.0, scorer.datafile_score(self.df1)) older = time.time() - (60 * 60 * 24 * 2 + 300) os.utime(f.name, (older, older)) self.assertEquals(5.0, scorer.datafile_score(self.df1)) f.close()
def testLocalFile(self): content = urandom(1024) cf = ContentFile(content, 'background_task_testfile') # Create new Datafile datafile = Dataset_File(dataset=self.dataset) datafile.filename = cf.name datafile.size = len(content) datafile.sha512sum = hashlib.sha512(content).hexdigest() datafile.save() replica = Replica(datafile=datafile, url=write_uploaded_file_to_dataset(self.dataset, cf), location=Location.get_default_location()) replica.save() def get_replica(datafile): return Replica.objects.get(datafile=datafile) # undo auto-verify: replica.verified = False replica.save(update_fields=['verified']) # Check that it's not currently verified expect(get_replica(datafile).verified).to_be(False) # Check it verifies verify_files() expect(get_replica(datafile).verified).to_be(True)
def stage_replica(replica): from django.core.files.uploadedfile import TemporaryUploadedFile from tardis.tardis_portal.models import Replica, Location if not replica.location.type == 'external': raise ValueError('Only external replicas can be staged') if getattr(settings, "DEEP_DATASET_STORAGE", False): relurl = path.relpath(replica.url[7:], settings.SYNC_TEMP_PATH) spliturl = relurl.split(os.sep)[1:] subdir = path.dirname(path.join(*spliturl)) else: subdir = None with TemporaryUploadedFile(replica.datafile.filename, None, None, None) as tf: if replica.verify(tempfile=tf.file): if not replica.stay_remote: tf.file.flush() target_replica = { 'datafile': replica.datafile, 'url': write_uploaded_file_to_dataset( replica.datafile.dataset, tf, subdir=subdir), 'location': Location.get_default_location(), 'verified': True, 'protocol': ''} Replica.objects.filter(id=replica.id).update(**target_replica) return True else: return False
def create_datafile(index): testfile = path.join(path.dirname(__file__), 'fixtures', 'jeol_sem_test%d.txt' % index) size, sha512sum = get_size_and_sha512sum(testfile) datafile = Dataset_File(dataset=dataset, filename=path.basename(testfile), size=size, sha512sum=sha512sum) datafile.save() base_url = 'file://' + path.abspath(path.dirname(testfile)) location = Location.load_location({ 'name': 'test-jeol', 'url': base_url, 'type': 'external', 'priority': 10, 'transfer_provider': 'local' }) replica = Replica(datafile=datafile, url='file://' + path.abspath(testfile), protocol='file', location=location) replica.verify() replica.save() return Dataset_File.objects.get(pk=datafile.pk)
def process_enclosure(self, dataset, enclosure): filename = getattr(enclosure, 'title', basename(enclosure.href)) datafile = Dataset_File(filename=filename, dataset=dataset) try: datafile.mimetype = enclosure.mime except AttributeError: pass try: datafile.size = enclosure.length except AttributeError: pass try: hash = enclosure.hash # Split on white space, then ':' to get tuples to feed into dict hashdict = dict([s.partition(':')[::2] for s in hash.split()]) # Set SHA-512 sum datafile.sha512sum = hashdict['sha-512'] except AttributeError: pass datafile.save() url = enclosure.href # This means we will allow the atom feed to feed us any enclosure # URL that matches a registered location. Maybe we should restrict # this to a specific location. location = Location.get_location_for_url(url) if not location: logger.error('Rejected ingestion for unknown location %s' % url) return replica = Replica(datafile=datafile, url=url, location=location) replica.protocol = enclosure.href.partition('://')[0] replica.save() self.make_local_copy(replica)
def testMigrateRestore(self): dest = Location.get_location('test') local = Location.get_location('local') datafile, replica = generate_datafile(None, self.dataset, "Hi mum", verify=False, verify_checksums_req=True) # Attempt to migrate without datafile hashes ... should # fail because we can't verify. with self.assertRaises(MigrationError): migrate_replica(replica, dest) # Verify sets hashes ... self.assertEquals(replica.verify(allowEmptyChecksums=True), True) replica = Replica.objects.get(pk=replica.pk) path = datafile.get_absolute_filepath() self.assertTrue(os.path.exists(path)) self.assertTrue(migrate_replica(replica, dest)) self.assertFalse(os.path.exists(path)) # Bring it back new_replica = datafile.get_preferred_replica() url = new_replica.url self.assertTrue(migrate_replica(new_replica, local)) self.assertTrue(os.path.exists(path)) # Check it was deleted remotely with self.assertRaises(TransferError): dest.provider.get_length(new_replica) # Refresh the datafile object because it is now stale ... datafile = Dataset_File.objects.get(id=datafile.id) replica = datafile.get_preferred_replica() # Repeat the process with 'noRemove' self.assertTrue(migrate_replica(replica, dest, noRemove=True)) new_replica = datafile.get_preferred_replica() self.assertTrue(os.path.exists(path)) self.assertEquals(dest.provider.get_length(new_replica), 6) migrate_replica(new_replica, local, noRemove=True) newpath = datafile.get_absolute_filepath() replica = datafile.get_preferred_replica() self.assertTrue(os.path.exists(path)) self.assertTrue(os.path.exists(newpath)) self.assertNotEqual(path, newpath) self.assertEquals(dest.provider.get_length(new_replica), 6)
def setUp(self): # Create test owner without enough details username, email, password = ('testuser', '*****@*****.**', 'password') user = User.objects.create_user(username, email, password) profile = UserProfile(user=user, isDjangoAccount=True) profile.save() # Need UserAuthentication UserAuthentication(userProfile=profile, username=username, authenticationMethod='localdb').save() # Create staging dir from os import path, makedirs staging_dir = path.join(settings.STAGING_PATH, username) if not path.exists(staging_dir): makedirs(staging_dir) # Ensure that staging dir is set up properly expect(get_full_staging_path(username)).to_be_truthy() Location.force_initialize() # Create test experiment and make user the owner of it experiment = Experiment(title='Text Experiment', institution_name='Test Uni', created_by=user) experiment.save() acl = ExperimentACL( pluginId=django_user, entityId=str(user.id), experiment=experiment,\ canRead=True, isOwner=True, aclOwnershipType=ExperimentACL.OWNER_OWNED, ) acl.save() self.dataset = \ Dataset(description='dataset description...') self.dataset.save() self.dataset.experiments.add(experiment) self.dataset.save() self.username, self.password = (username, password)
def do_ext_provider(self, loc_name): # This test requires an external test server configured # as per the 'dest_name' destination. We skip the test is the # server doesn't respond. loc = Location.get_location(loc_name) if loc.provider.alive(): self.do_provider(loc) else: print "SKIPPING TEST - %s server on %s is not responding\n" % (loc_name, loc.url)
def testArchiveExperiment(self): dataset = generate_dataset() experiment = generate_experiment([dataset], [self.dummy_user]) datafile, _ = generate_datafile(None, dataset, "Hi grandpa") archtest = Location.get_location('archtest') # Dry run ... out = StringIO() try: call_command('archive', experiment.id, verbosity=1, stdout=out, dryRun=True) except SystemExit: pass out.seek(0) self.assertEquals(out.read(), 'Would have archived experiment %s\n' % experiment.id) # Dry run ... all out = StringIO() try: call_command('archive', all=True, verbosity=1, stdout=out, dryRun=True) except SystemExit: pass out.seek(0) self.assertEquals(out.read(), 'Would have archived experiment %s\n' % experiment.id) # Do one ... to file out = StringIO() try: call_command('archive', experiment.id, directory='/tmp', verbosity=1, stdout=out) except SystemExit: pass out.seek(0) self.assertEquals( out.read(), 'Archived experiment %s to /tmp/%s-archive.tar.gz\n' \ 'Archived 1 experiments with 0 errors\n' % \ (experiment.id, experiment.id)) # Do one ... to archtest out = StringIO() try: call_command('archive', experiment.id, location='archtest', verbosity=1, stdout=out) except SystemExit: pass out.seek(0) self.assertEquals( out.read(), 'Archived experiment %s to %s%s-archive.tar.gz\n' \ 'Archived 1 experiments with 0 errors\n' % \ (experiment.id, archtest.provider.base_url, experiment.id))
def do_ext_provider(self, loc_name): # This test requires an external test server configured # as per the 'dest_name' destination. We skip the test is the # server doesn't respond. loc = Location.get_location(loc_name) if loc.provider.alive(): self.do_provider(loc) else: print 'SKIPPING TEST - %s server on %s is not responding\n' % \ (loc_name, loc.url)
def setUp(self): # Create test owner without enough details username, email, password = ('testuser', '*****@*****.**', 'password') user = User.objects.create_user(username, email, password) profile = UserProfile(user=user, isDjangoAccount=True) profile.save() # Need UserAuthentication UserAuthentication(userProfile=profile, username=username, authenticationMethod='localdb').save() # Create staging dir from os import path, makedirs staging_dir = path.join(settings.STAGING_PATH, username) if not path.exists(staging_dir): makedirs(staging_dir) # Ensure that staging dir is set up properly expect(get_full_staging_path(username)).to_be_truthy() Location.force_initialize() # Create test experiment and make user the owner of it experiment = Experiment(title='Text Experiment', institution_name='Test Uni', created_by=user) experiment.save() acl = ObjectACL( pluginId=django_user, entityId=str(user.id), content_object=experiment, canRead=True, isOwner=True, aclOwnershipType=ObjectACL.OWNER_OWNED, ) acl.save() self.dataset = \ Dataset(description='dataset description...') self.dataset.save() self.dataset.experiments.add(experiment) self.dataset.save() self.username, self.password = (username, password)
def _get_destination(self, destName, default): if not destName: if not default: raise CommandError("No default destination configured") else: destName = default dest = Location.get_location(destName) if not dest: raise CommandError("Destination %s not known" % destName) return dest
def _build(dataset, filename, url, protocol): from tardis.tardis_portal.models import \ Dataset_File, Replica, Location datafile = Dataset_File(dataset=dataset, filename=filename) datafile.save() replica = Replica(datafile=datafile, url=url, protocol=protocol, location=Location.get_default_location()) replica.save() return datafile
def _create_datafile(): user = User.objects.create_user('testuser', '*****@*****.**', 'pwd') user.save() UserProfile(user=user).save() Location.force_initialize() full_access = Experiment.PUBLIC_ACCESS_FULL experiment = Experiment.objects.create(title="IIIF Test", created_by=user, public_access=full_access) experiment.save() ObjectACL(content_object=experiment, pluginId='django_user', entityId=str(user.id), isOwner=True, canRead=True, canWrite=True, canDelete=True, aclOwnershipType=ObjectACL.OWNER_OWNED).save() dataset = Dataset() dataset.save() dataset.experiments.add(experiment) dataset.save() # Create new Datafile tempfile = TemporaryUploadedFile('iiif_stored_file', None, None, None) with Image(filename='magick:rose') as img: img.format = 'tiff' img.save(file=tempfile.file) tempfile.file.flush() datafile = Dataset_File(dataset=dataset, size=os.path.getsize(tempfile.file.name), filename='iiif_named_file') replica = Replica(datafile=datafile, url=write_uploaded_file_to_dataset(dataset, tempfile), location=Location.get_default_location()) replica.verify(allowEmptyChecksums=True) datafile.save() replica.datafile = datafile replica.save() return datafile
def _create_datafile(): user = User.objects.create_user("testuser", "*****@*****.**", "pwd") user.save() UserProfile(user=user).save() Location.force_initialize() full_access = Experiment.PUBLIC_ACCESS_FULL experiment = Experiment.objects.create(title="IIIF Test", created_by=user, public_access=full_access) experiment.save() ObjectACL( content_object=experiment, pluginId="django_user", entityId=str(user.id), isOwner=True, canRead=True, canWrite=True, canDelete=True, aclOwnershipType=ObjectACL.OWNER_OWNED, ).save() dataset = Dataset() dataset.save() dataset.experiments.add(experiment) dataset.save() # Create new Datafile tempfile = TemporaryUploadedFile("iiif_stored_file", None, None, None) with Image(filename="magick:rose") as img: img.format = "tiff" img.save(file=tempfile.file) tempfile.file.flush() datafile = Dataset_File(dataset=dataset, size=os.path.getsize(tempfile.file.name), filename="iiif_named_file") replica = Replica( datafile=datafile, url=write_uploaded_file_to_dataset(dataset, tempfile), location=Location.get_default_location(), ) replica.verify(allowEmptyChecksums=True) datafile.save() replica.datafile = datafile replica.save() return datafile
def testMigrationNoHashes(self): # Tweak the server to turn off the '?metadata' query self.server.server.allowQuery = False dest = Location.get_location('test') datafile, replica = generate_datafile("1/2/3", self.dataset, "Hi mum") self.assertEquals(replica.verify(allowEmptyChecksums=True), True) path = datafile.get_absolute_filepath() self.assertTrue(os.path.exists(path)) migrate_replica(replica, dest) self.assertFalse(os.path.exists(path))
def _build_datafile(self, testfile, filename, dataset, url, protocol='', checksum=None, size=None, mimetype=''): filesize, sha512sum = get_size_and_sha512sum(testfile) datafile = Dataset_File( dataset=dataset, filename=filename, mimetype=mimetype, size=str(size if size != None else filesize), sha512sum=(checksum if checksum else sha512sum)) datafile.save() if urlparse.urlparse(url).scheme == '': location = Location.get_location('local') else: location = Location.get_location_for_url(url) if not location: location = Location.load_location({ 'name': filename, 'url': urlparse.urljoin(url, '.'), 'type': 'external', 'priority': 10, 'transfer_provider': 'local' }) replica = Replica(datafile=datafile, protocol=protocol, url=url, location=location) replica.verify() replica.save() return Dataset_File.objects.get(pk=datafile.pk)
def testProviderInstantiation(self): """ Test that transfer_provider instantiation works """ provider = Location.get_location("test").provider self.assertIsInstance(provider, TransferProvider) self.assertIsInstance(provider, SimpleHttpTransfer) self.assertEqual(provider.base_url, "http://127.0.0.1:4272/data/") provider = Location.get_location("test2").provider self.assertIsInstance(provider, TransferProvider) self.assertIsInstance(provider, WebDAVTransfer) self.assertFalse(401 in provider.opener.handle_error["http"]) self.assertEqual(provider.base_url, "http://127.0.0.1/data2/") provider = Location.get_location("test3").provider self.assertIsInstance(provider, TransferProvider) self.assertIsInstance(provider, WebDAVTransfer) self.assertTrue(401 in provider.opener.handle_error["http"]) self.assertEqual(provider.base_url, "http://127.0.0.1/data3/")
def _create_test_dataset(nosDatafiles): ds_ = Dataset(description='happy snaps of plumage') ds_.save() for i in range (0, nosDatafiles) : df_ = Dataset_File(dataset=ds_, size='21', sha512sum='bogus') df_.save() rep_ = Replica(datafile=df_, url='http://planet-python.org/' + str(_next_id()), location=Location.get_default_location()) rep_.save() ds_.save() return ds_
def testProviderInstantiation(self): ''' Test that transfer_provider instantiation works ''' provider = Location.get_location('test').provider self.assertIsInstance(provider, TransferProvider) self.assertIsInstance(provider, SimpleHttpTransfer) self.assertEqual(provider.base_url, 'http://127.0.0.1:4272/data/') provider = Location.get_location('test2').provider self.assertIsInstance(provider, TransferProvider) self.assertIsInstance(provider, WebDAVTransfer) self.assertFalse(401 in provider.opener.handle_error['http']) self.assertEqual(provider.base_url, 'http://127.0.0.1/data2/') provider = Location.get_location('test3').provider self.assertIsInstance(provider, TransferProvider) self.assertIsInstance(provider, WebDAVTransfer) self.assertTrue(401 in provider.opener.handle_error['http']) self.assertEqual(provider.base_url, 'http://127.0.0.1/data3/')
def _create_test_dataset(nosDatafiles): ds_ = Dataset(description='happy snaps of plumage') ds_.save() for i in range(0, nosDatafiles): df_ = Dataset_File(dataset=ds_, size='21', sha512sum='bogus') df_.save() rep_ = Replica(datafile=df_, url='http://planet-python.org/' + str(_next_id()), location=Location.get_default_location()) rep_.save() ds_.save() return ds_
def setUp(self): """ setting up essential objects, copied from tests above """ Location.force_initialize() self.location = Location.get_location('local') user = '******' pwd = 'secret' email = '' self.user = User.objects.create_user(user, email, pwd) self.userProfile = UserProfile(user=self.user).save() self.exp = Experiment(title='test exp1', institution_name='monash', created_by=self.user) self.exp.save() self.acl = ObjectACL( pluginId=django_user, entityId=str(self.user.id), content_object=self.exp, canRead=True, isOwner=True, aclOwnershipType=ObjectACL.OWNER_OWNED, ) self.acl.save() self.dataset = Dataset(description='dataset description...') self.dataset.save() self.dataset.experiments.add(self.exp) self.dataset.save() self.dataset_file = Dataset_File(dataset=self.dataset, size=42, filename="foo", md5sum="junk") self.dataset_file.save() self.replica = Replica(datafile=self.dataset_file, url="http://foo", location=self.location, verified=False) self.replica.save()
def _get_destination(self, destName, default): if not destName: if not default: raise CommandError("No default destination configured") else: destName = default try: dest = Location.get_location(destName) if not dest: raise CommandError("Destination %s not known" % destName) return dest except MigrationError as e: raise CommandError("Migration error: %s" % e.args[0])
def _build_datafile(self, testfile, filename, dataset, url, protocol='', checksum=None, size=None, mimetype=''): filesize, sha512sum = get_size_and_sha512sum(testfile) datafile = Dataset_File(dataset=dataset, filename=filename, mimetype=mimetype, size=str(size if size != None else filesize), sha512sum=(checksum if checksum else sha512sum)) datafile.save() if urlparse.urlparse(url).scheme == '': location = Location.get_location('local') else: location = Location.get_location_for_url(url) if not location: location = Location.load_location({ 'name': filename, 'url': urlparse.urljoin(url, '.'), 'type': 'external', 'priority': 10, 'transfer_provider': 'local'}) replica = Replica(datafile=datafile, protocol=protocol, url=url, location=location) replica.verify() replica.save() return Dataset_File.objects.get(pk=datafile.pk)
def _setup(self): Location.force_initialize() self.user1 = generate_user('joe', 2) self.user2 = generate_user('fred', 1) self.exp1 = generate_experiment(users=[self.user1, self.user2]) self.exp2 = generate_experiment(users=[self.user1]) self.exp3 = generate_experiment(users=[self.user1]) self.exp4 = generate_experiment(users=[self.user1]) self.ds1 = generate_dataset(experiments=[self.exp1]) self.ds2 = generate_dataset(experiments=[self.exp1, self.exp2]) self.ds3 = generate_dataset(experiments=[self.exp3]) self.ds4 = generate_dataset(experiments=[self.exp4]) self.df1, self.rep1 = generate_datafile('1/2/1', self.ds1, size=100) self.df2, self.rep2 = generate_datafile('1/2/2', self.ds1, size=100, verified=False) self.df3, self.rep3 = generate_datafile( 'http://127.0.0.1:4272/data/1/2/3', self.ds1, size=1000) self.df4, self.rep4 = generate_datafile('1/2/4', self.ds2, size=1000) self.df5, self.rep5 = generate_datafile('1/2/5', self.ds2, size=10000) self.df6, self.rep6 = generate_datafile('1/2/6', self.ds3, size=100000) self.df7, self.rep7 = generate_datafile('1/2/7', self.ds4, size=0) self.df8, self.rep8 = generate_datafile('1/2/8', self.ds4, size=-1)
def create_staging_datafile(filepath, username, dataset_id): from tardis.tardis_portal.models import Dataset_File, Dataset, Replica, \ Location dataset = Dataset.objects.get(id=dataset_id) url, size = get_staging_url_and_size(username, filepath) datafile = Dataset_File(dataset=dataset, filename=path.basename(filepath), size=size) replica = Replica(datafile=datafile, protocol='staging', url=url, location=Location.get_location('staging')) replica.verify(allowEmptyChecksums=True) datafile.save() replica.datafile = datafile replica.save()
def testMirror(self): dest = Location.get_location('test') datafile, replica = generate_datafile(None, self.dataset, "Hi granny") path = datafile.get_absolute_filepath() self.assertTrue(os.path.exists(path)) dummy_replica = Replica() dummy_replica.datafile = datafile dummy_replica.location = Location.objects.get(name='test') dummy_replica.url = dummy_replica.generate_default_url() with self.assertRaises(TransferError): dest.provider.get_length(dummy_replica) self.assertTrue(migrate_replica(replica, dest, mirror=True)) datafile = Dataset_File.objects.get(id=datafile.id) self.assertTrue(datafile.is_local()) self.assertEquals(dest.provider.get_length(dummy_replica), 9)
def fpupload(request, dataset_id): """ Uploads all files picked by filepicker to the dataset :param request: a HTTP Request instance :type request: :class:`django.http.HttpRequest` :param dataset_id: the dataset_id :type dataset_id: integer :returns: boolean true if successful :rtype: bool """ dataset = Dataset.objects.get(id=dataset_id) logger.debug('called fpupload') if request.method == 'POST': logger.debug('got POST') for key, val in request.POST.items(): splits = val.split(",") for url in splits: try: fp = FilepickerFile(url) except ValueError: pass else: picked_file = fp.get_file() filepath = write_uploaded_file_to_dataset( dataset, picked_file) datafile = Dataset_File(dataset=dataset, filename=picked_file.name, size=picked_file.size) replica = Replica(datafile=datafile, url=filepath, protocol='', location=Location.get_default_location()) replica.verify(allowEmptyChecksums=True) datafile.save() replica.datafile = datafile replica.save() return HttpResponse(json.dumps({"result": True}))
def migrate_replica(replica, location, noRemove=False, mirror=False): """ Migrate the replica to a different storage location. The overall effect will be that the datafile will be stored at the new location and removed from the current location, and the datafile metadata will be updated to reflect this. """ from tardis.tardis_portal.models import Replica, Location with transaction.commit_on_success(): replica = Replica.objects.select_for_update().get(pk=replica.pk) source = Location.get_location(replica.location.name) if not replica.verified or location.provider.trust_length: raise MigrationError('Only verified datafiles can be migrated' \ ' to this destination') filename = replica.get_absolute_filepath() try: newreplica = Replica.objects.get(datafile=replica.datafile, location=location) created_replica = False # We've most likely mirrored this file previously. But if # we are about to delete the source Replica, we need to check # that the target Replica still verifies. if not mirror and not check_file_transferred(newreplica, location): raise MigrationError('Previously mirrored / migrated Replica' \ ' no longer verifies locally!') except Replica.DoesNotExist: newreplica = Replica() newreplica.location = location newreplica.datafile = replica.datafile newreplica.protocol = '' newreplica.stay_remote = location != Location.get_default_location( ) newreplica.verified = False url = location.provider.generate_url(newreplica) if newreplica.url == url: # We should get here ... raise MigrationError('Cannot migrate a replica to its' \ ' current location') newreplica.url = url location.provider.put_file(replica, newreplica) verified = False try: verified = check_file_transferred(newreplica, location) except: # FIXME - should we always do this? location.provider.remove_file(newreplica) raise newreplica.verified = verified newreplica.save() logger.info('Transferred file %s for replica %s' % (filename, replica.id)) created_replica = True if mirror: return created_replica # FIXME - do this more reliably ... replica.delete() if not noRemove: source.provider.remove_file(replica) logger.info('Removed local file %s for replica %s' % (filename, replica.id)) return True