def create_datafile(index): testfile = path.join(path.dirname(__file__), 'fixtures', 'middleware_test%d.txt' % index) size, sha512sum = get_size_and_sha512sum(testfile) datafile = Dataset_File(dataset=dataset, filename=path.basename(testfile), size=size, sha512sum=sha512sum) datafile.save() base_url = 'file://' + path.abspath(path.dirname(testfile)) location = Location.load_location({ 'name': 'test-middleware', 'url': base_url, 'type': 'external', 'priority': 10, 'transfer_provider': 'local' }) replica = Replica(datafile=datafile, url='file://' + path.abspath(testfile), protocol='file', location=location) replica.save() if index != 1: replica.verified = False replica.save(update_fields=['verified']) return Dataset_File.objects.get(pk=datafile.pk)
def testLocalFile(self): content = urandom(1024) cf = ContentFile(content, 'background_task_testfile') # Create new Datafile datafile = Dataset_File(dataset=self.dataset) datafile.filename = cf.name datafile.size = len(content) datafile.sha512sum = hashlib.sha512(content).hexdigest() datafile.save() replica = Replica(datafile=datafile, url=write_uploaded_file_to_dataset(self.dataset, cf), location=Location.get_default_location()) replica.save() def get_replica(datafile): return Replica.objects.get(datafile=datafile) # undo auto-verify: replica.verified = False replica.save(update_fields=['verified']) # Check that it's not currently verified expect(get_replica(datafile).verified).to_be(False) # Check it verifies verify_files() expect(get_replica(datafile).verified).to_be(True)
def generate_datafile(path, dataset, content=None, size=-1, verify=True, verified=True): '''Generates a datafile AND a replica to hold its contents''' from tardis.tardis_portal.models import Dataset_File, Replica, Location saved = settings.REQUIRE_DATAFILE_CHECKSUMS settings.REQUIRE_DATAFILE_CHECKSUMS = False try: datafile = Dataset_File() if content: datafile.size = str(len(content)) else: datafile.size = str(size) # Normally we use any old string for the datafile path, but some # tests require the path to be the same as what 'staging' would use if path == None: datafile.dataset_id = dataset.id datafile.save() path = "%s/%s/%s" % (dataset.get_first_experiment().id, dataset.id, datafile.id) filepath = os.path.normpath(FILE_STORE_PATH + '/' + path) if content: try: os.makedirs(os.path.dirname(filepath)) os.remove(filepath) except: pass file = open(filepath, 'wb+') file.write(content) file.close() datafile.mimetype = "application/unspecified" datafile.filename = os.path.basename(filepath) datafile.dataset_id = dataset.id datafile.save() location = _infer_location(path) replica = Replica(datafile=datafile, url=path, protocol='', location=location) if verify and content: if not replica.verify(allowEmptyChecksums=True): raise RuntimeError('verify failed!?!') else: replica.verified = verified replica.save() return (datafile, replica) finally: settings.REQUIRE_DATAFILE_CHECKSUMS = saved
def generate_datafile(path, dataset, content=None, size=-1, verify=True, verified=True, verify_checksums_req=False): '''Generates a datafile AND a replica to hold its contents''' from tardis.tardis_portal.models import Dataset_File, Replica, Location saved = settings.REQUIRE_DATAFILE_CHECKSUMS settings.REQUIRE_DATAFILE_CHECKSUMS = False try: datafile = Dataset_File() if content: datafile.size = str(len(content)) else: datafile.size = str(size) # Normally we use any old string for the datafile path, but some # tests require the path to be the same as what 'staging' would use if path == None: datafile.dataset_id = dataset.id datafile.save() path = "%s/%s/%s" % (dataset.get_first_experiment().id, dataset.id, datafile.id) filepath = os.path.normpath(settings.FILE_STORE_PATH + '/' + path) if content: try: os.makedirs(os.path.dirname(filepath)) os.remove(filepath) except: pass gen_file = open(filepath, 'wb+') gen_file.write(content) gen_file.close() datafile.mimetype = "application/unspecified" datafile.filename = os.path.basename(filepath) datafile.dataset_id = dataset.id datafile.save() settings.REQUIRE_DATAFILE_CHECKSUMS = verify_checksums_req location = _infer_location(path) replica = Replica(datafile=datafile, url=path, protocol='', location=location) if verify and content: if not replica.verify(): raise RuntimeError('verify failed!?!') replica.save() replica.verified = verified replica.save(update_fields=['verified']) # force no verification return (datafile, replica) finally: settings.REQUIRE_DATAFILE_CHECKSUMS = saved
def create_datafile(index): testfile = path.join(path.dirname(__file__), 'fixtures', 'middleware_test%d.txt' % index) size, sha512sum = get_size_and_sha512sum(testfile) datafile = Dataset_File(dataset=dataset, filename=path.basename(testfile), size=size, sha512sum=sha512sum) datafile.save() base_url = 'file://' + path.abspath(path.dirname(testfile)) location = Location.load_location({ 'name': 'test-middleware', 'url': base_url, 'type': 'external', 'priority': 10, 'transfer_provider': 'local'}) replica = Replica(datafile=datafile, url='file://'+path.abspath(testfile), protocol='file', location=location) replica.save() if index != 1: replica.verified = False replica.save(update_fields=['verified']) return Dataset_File.objects.get(pk=datafile.pk)
def migrate_replica(replica, location, noRemove=False, mirror=False): """ Migrate the replica to a different storage location. The overall effect will be that the datafile will be stored at the new location and removed from the current location, and the datafile metadata will be updated to reflect this. """ from tardis.tardis_portal.models import Replica, Location with transaction.commit_on_success(): replica = Replica.objects.select_for_update().get(pk=replica.pk) source = Location.get_location(replica.location.name) if not replica.verified or location.provider.trust_length: raise MigrationError('Only verified datafiles can be migrated' \ ' to this destination') filename = replica.get_absolute_filepath() try: newreplica = Replica.objects.get(datafile=replica.datafile, location=location) created_replica = False # We've most likely mirrored this file previously. But if # we are about to delete the source Replica, we need to check # that the target Replica still verifies. if not mirror and not check_file_transferred(newreplica, location): raise MigrationError('Previously mirrored / migrated Replica' \ ' no longer verifies locally!') except Replica.DoesNotExist: newreplica = Replica() newreplica.location = location newreplica.datafile = replica.datafile newreplica.protocol = '' newreplica.stay_remote = location != Location.get_default_location() newreplica.verified = False url = location.provider.generate_url(newreplica) if newreplica.url == url: # We should get here ... raise MigrationError('Cannot migrate a replica to its' \ ' current location') newreplica.url = url location.provider.put_file(replica, newreplica) verified = False try: verified = check_file_transferred(newreplica, location) except: # FIXME - should we always do this? location.provider.remove_file(newreplica) raise newreplica.verified = verified newreplica.save() logger.info('Transferred file %s for replica %s' % (filename, replica.id)) created_replica = True if mirror: return created_replica # FIXME - do this more reliably ... replica.delete() if not noRemove: source.provider.remove_file(replica) logger.info('Removed local file %s for replica %s' % (filename, replica.id)) return True
def migrate_replica(replica, location, noRemove=False, mirror=False): """ Migrate the replica to a different storage location. The overall effect will be that the datafile will be stored at the new location and removed from the current location, and the datafile metadata will be updated to reflect this. """ from tardis.tardis_portal.models import Replica, Location with transaction.commit_on_success(): replica = Replica.objects.select_for_update().get(pk=replica.pk) source = Location.get_location(replica.location.name) if not replica.verified or location.provider.trust_length: raise MigrationError('Only verified datafiles can be migrated' \ ' to this destination') filename = replica.get_absolute_filepath() try: newreplica = Replica.objects.get(datafile=replica.datafile, location=location) created_replica = False # We've most likely mirrored this file previously. But if # we are about to delete the source Replica, we need to check # that the target Replica still verifies. if not mirror and not check_file_transferred(newreplica, location): raise MigrationError('Previously mirrored / migrated Replica' \ ' no longer verifies locally!') except Replica.DoesNotExist: newreplica = Replica() newreplica.location = location newreplica.datafile = replica.datafile newreplica.protocol = '' newreplica.stay_remote = location != Location.get_default_location( ) newreplica.verified = False url = location.provider.generate_url(newreplica) if newreplica.url == url: # We should get here ... raise MigrationError('Cannot migrate a replica to its' \ ' current location') newreplica.url = url location.provider.put_file(replica, newreplica) verified = False try: verified = check_file_transferred(newreplica, location) except: # FIXME - should we always do this? location.provider.remove_file(newreplica) raise newreplica.verified = verified newreplica.save() logger.info('Transferred file %s for replica %s' % (filename, replica.id)) created_replica = True if mirror: return created_replica # FIXME - do this more reliably ... replica.delete() if not noRemove: source.provider.remove_file(replica) logger.info('Removed local file %s for replica %s' % (filename, replica.id)) return True