def create_datafile(index): testfile = path.join(path.dirname(__file__), 'fixtures', 'jeol_sem_test%d.txt' % index) size, sha512sum = get_size_and_sha512sum(testfile) datafile = Dataset_File(dataset=dataset, filename=path.basename(testfile), size=size, sha512sum=sha512sum) datafile.save() base_url = 'file://' + path.abspath(path.dirname(testfile)) location = Location.load_location({ 'name': 'test-jeol', 'url': base_url, 'type': 'external', 'priority': 10, 'transfer_provider': 'local' }) replica = Replica(datafile=datafile, url='file://' + path.abspath(testfile), protocol='file', location=location) replica.verify() replica.save() return Dataset_File.objects.get(pk=datafile.pk)
def clone(cls, oldInstance, newDescription, username): newInstance = cls(description=newDescription, experiment_id=oldInstance.dataset.experiment.id) for param in oldInstance.parameters: if param.name.name not in cls.doNotCopyParams: if param.name.isNumeric(): value = param.numerical_value else: value = param.string_value newInstance.new_param(param.name.name, value) import shutil import os for filename in oldInstance.get_params("uploaded_file", value=True): if filename[-8:] != ".jobfile": thisfile = Dataset_File.objects.get( dataset=oldInstance.dataset, filename=filename) shutil.copy(thisfile.get_absolute_filepath(), get_full_staging_path(username)) newfileurl = os.path.join(get_full_staging_path(username), filename) newDatafile = Dataset_File( dataset=newInstance.dataset, url=newfileurl, protocol="staging", mimetype=thisfile.mimetype, ) newDatafile.save() return newInstance
def add_datafile_to_dataset(dataset, filepath, size): """ Adds datafile metadata to a dataset :param dataset: dataset who's directory to be written to :type dataset: :class:`tardis.tardis_portal.models.Dataset` :param filepath: The full os path to the file :type filepath: string :param size: The file size in bytes :type size: string :rtype: The new datafile object """ from tardis.tardis_portal.models import Dataset_File experiment_path = path.join(settings.FILE_STORE_PATH, str(dataset.experiment.id)) dataset_path = path.join(experiment_path, str(dataset.id)) urlpath = 'tardis:/' + filepath[len(dataset_path):] filename = urlpath.rpartition('/')[2] datafile = Dataset_File(dataset=dataset, filename=filename, url=urlpath, size=size, protocol='tardis') datafile.save() return datafile
def setUp(self): # create a test user self.user = User.objects.create_user(username='******', email='', password='******') # create a public experiment self.experiment1 = Experiment(title='Experiment 1', created_by=self.user, public=True) self.experiment1.save() # create a non-public experiment self.experiment2 = Experiment(title='Experiment 2', created_by=self.user, public=False) self.experiment2.save() # dataset1 belongs to experiment1 self.dataset1 = Dataset(experiment=self.experiment1) self.dataset1.save() # dataset2 belongs to experiment2 self.dataset2 = Dataset(experiment=self.experiment2) self.dataset2.save() # absolute path first filename = 'testfile.txt' self.dest1 = abspath( join(settings.FILE_STORE_PATH, '%s/%s/' % (self.experiment1.id, self.dataset1.id))) self.dest2 = abspath( join(settings.FILE_STORE_PATH, '%s/%s/' % (self.experiment2.id, self.dataset2.id))) if not exists(self.dest1): makedirs(self.dest1) if not exists(self.dest2): makedirs(self.dest2) testfile1 = abspath(join(self.dest1, filename)) f = open(testfile1, 'w') f.write("Hello World!\n") f.close() testfile2 = abspath(join(self.dest2, filename)) f = open(testfile2, 'w') f.write("Hello World!\n") f.close() self.dataset_file1 = Dataset_File(dataset=self.dataset1, filename=filename, protocol='tardis', url='tardis://%s' % filename) self.dataset_file1.save() self.dataset_file2 = Dataset_File(dataset=self.dataset2, filename=basename(filename), protocol='tardis', url='tardis://%s' % filename) self.dataset_file2.save()
def add_datafile_to_dataset(dataset, filepath, size): """ Adds datafile metadata to a dataset :param dataset: dataset who's directory to be written to :type dataset: :class:`tardis.tardis_portal.models.Dataset` :param filepath: The full os path to the file :type filepath: string :param size: The file size in bytes :type size: string :rtype: The new datafile object """ experiment_path = path.join(settings.FILE_STORE_PATH, str(dataset.experiment.id)) dataset_path = path.join(experiment_path, str(dataset.id)) urlpath = 'file:/' + filepath[len(experiment_path):] filename = urlpath.rpartition('/')[2] datafile = Dataset_File(dataset=dataset, filename=filename, url=urlpath, size=size, protocol='') datafile.save() return datafile
def _make_dataset(self, exp, filenames): dataset = Dataset(experiment=exp) dataset.save() for filename in filenames: df = Dataset_File(dataset=dataset, size=41, protocol='file') df.filename = filename df.url = 'file://' + path.join(path.dirname(__file__), 'data', df.filename) df.save()
def _create_test_dataset(nosDatafiles): ds_ = Dataset(description='happy snaps of plumage') ds_.save() for i in range (0, nosDatafiles) : df_ = Dataset_File(dataset=ds_, url='http://planet-python.org/' + str(_next_id())) df_.save() ds_.save() return ds_
def setUp(self): # create a test user self.user = User.objects.create_user(username='******', email='', password='******') # create a public experiment self.experiment1 = Experiment(title='Experiment 1', created_by=self.user, public=True) self.experiment1.save() # create a non-public experiment self.experiment2 = Experiment(title='Experiment 2', created_by=self.user, public=False) self.experiment2.save() # dataset1 belongs to experiment1 self.dataset1 = Dataset(experiment=self.experiment1) self.dataset1.save() # dataset2 belongs to experiment2 self.dataset2 = Dataset(experiment=self.experiment2) self.dataset2.save() # absolute path first filename = 'testfile.txt' self.dest1 = abspath(join(settings.FILE_STORE_PATH, '%s' % self.experiment1.id)) self.dest2 = abspath(join(settings.FILE_STORE_PATH, '%s' % self.experiment2.id)) if not exists(self.dest1): mkdir(self.dest1) if not exists(self.dest2): mkdir(self.dest2) testfile1 = abspath(join(self.dest1, filename)) f = open(testfile1, 'w') f.write("Hello World!\n") f.close() testfile2 = abspath(join(self.dest2, filename)) f = open(testfile2, 'w') f.write("Hello World!\n") f.close() self.dataset_file1 = Dataset_File(dataset=self.dataset1, filename=filename, protocol='tardis', url='tardis://%s' % filename) self.dataset_file1.save() self.dataset_file2 = Dataset_File(dataset=self.dataset2, filename=basename(filename), protocol='tardis', url='tardis://%s' % filename) self.dataset_file2.save()
def _create_test_dataset(nosDatafiles): ds_ = Dataset(description='happy snaps of plumage') ds_.save() for i in range(0, nosDatafiles): df_ = Dataset_File(dataset=ds_, url='http://planet-python.org/' + str(_next_id())) df_.save() ds_.save() return ds_
def _build(dataset, filename, url, protocol): from tardis.tardis_portal.models import \ Dataset_File, Replica, Location datafile = Dataset_File(dataset=dataset, filename=filename) datafile.save() replica = Replica(datafile=datafile, url=url, protocol=protocol, location=Location.get_default_location()) replica.save() return datafile
def add_staged_file_to_dataset(rel_filepath, dataset_id, username, mimetype="application/octet-stream"): """ add file in user's staging path to a dataset may be replaced by main code functions. quick and dirty hack to get it working """ originfilepath = os.path.join(get_full_staging_path(username), rel_filepath) dataset = Dataset.objects.get(pk=dataset_id) newDatafile = Dataset_File() newDatafile.dataset = dataset newDatafile.size = os.path.getsize(originfilepath) newDatafile.protocol = "tardis" newDatafile.mimetype = mimetype file_dir = "/" + str(dataset.experiment.id) + "/" + str(dataset.id) + "/" file_path = file_dir + rel_filepath prelim_full_file_path = settings.FILE_STORE_PATH + file_path full_file_path = duplicate_file_check_rename(prelim_full_file_path) newDatafile.filename = os.path.basename(full_file_path) newDatafile.url = "%s://%s" % (newDatafile.protocol, full_file_path[ len(settings.FILE_STORE_PATH) + len(file_dir):]) if not os.path.exists(os.path.dirname(full_file_path)): os.makedirs(os.path.dirname(full_file_path)) shutil.move(originfilepath, full_file_path) newDatafile.save()
def _create_test_dataset(nosDatafiles): ds_ = Dataset(description='happy snaps of plumage') ds_.save() for i in range (0, nosDatafiles) : df_ = Dataset_File(dataset=ds_, size='21', sha512sum='bogus') df_.save() rep_ = Replica(datafile=df_, url='http://planet-python.org/' + str(_next_id()), location=Location.get_default_location()) rep_.save() ds_.save() return ds_
def _create_test_dataset(nosDatafiles): ds_ = Dataset(description='happy snaps of plumage') ds_.save() for i in range(0, nosDatafiles): df_ = Dataset_File(dataset=ds_, size='21', sha512sum='bogus') df_.save() rep_ = Replica(datafile=df_, url='http://planet-python.org/' + str(_next_id()), location=Location.get_default_location()) rep_.save() ds_.save() return ds_
def testRemoteFile(self): content = urandom(1024) with NamedTemporaryFile() as f: # Create new Datafile datafile = Dataset_File(dataset=self.dataset) datafile.filename = 'background_task_testfile' datafile.size = len(content) datafile.sha512sum = hashlib.sha512(content).hexdigest() datafile.url = 'file://' + path.abspath(f.name) datafile.save() def get_datafile(datafile): return Dataset_File.objects.get(id=datafile.id) # Check that it won't verify as it stands expect(get_datafile(datafile).verified).to_be(False) verify_files() expect(get_datafile(datafile).verified).to_be(False) expect(get_datafile(datafile).is_local()).to_be(False) # Fill in the content f.write(content) f.flush() # Check it now verifies verify_files() expect(get_datafile(datafile).verified).to_be(True) expect(get_datafile(datafile).is_local()).to_be(True)
def create_staging_datafile(filepath, username, dataset_id): dataset = Dataset.objects.get(id=dataset_id) url, size = get_staging_url_and_size(username, filepath) datafile = Dataset_File(dataset=dataset, filename=path.basename(filepath), size=size) replica = Replica(datafile=datafile, protocol='staging', url=url, location=Location.get_location('staging')) replica.verify(allowEmptyChecksums=True) datafile.save() replica.datafile = datafile replica.save()
def create_staging_datafile(filepath, username, dataset_id): from tardis.tardis_portal.models import Dataset_File, Dataset, Replica, \ Location dataset = Dataset.objects.get(id=dataset_id) url, size = get_staging_url_and_size(username, filepath) datafile = Dataset_File(dataset=dataset, filename=path.basename(filepath), size=size) replica = Replica(datafile=datafile, protocol='staging', url=url, location=Location.get_location('staging')) replica.verify(allowEmptyChecksums=True) datafile.save() replica.datafile = datafile replica.save()
def process_enclosure(self, dataset, enclosure): filename = getattr(enclosure, 'title', basename(enclosure.href)) datafile = Dataset_File(filename=filename, dataset=dataset) try: datafile.mimetype = enclosure.mime except AttributeError: pass try: datafile.size = enclosure.length except AttributeError: pass try: hash = enclosure.hash # Split on white space, then ':' to get tuples to feed into dict hashdict = dict([s.partition(':')[::2] for s in hash.split()]) # Set SHA-512 sum datafile.sha512sum = hashdict['sha-512'] except AttributeError: pass datafile.save() url = enclosure.href # This means we will allow the atom feed to feed us any enclosure # URL that matches a registered location. Maybe we should restrict # this to a specific location. location = Location.get_location_for_url(url) if not location: logger.error('Rejected ingestion for unknown location %s' % url) return replica = Replica(datafile=datafile, url=url, location=location) replica.protocol = enclosure.href.partition('://')[0] replica.save() self.make_local_copy(replica)
def aadd_staged_file_to_dataset(rel_filepath, dataset_id, username, mimetype="application/octet-stream"): """ add file in user's staging path to a dataset may be replaced by main code functions. quick and dirty hack to get it working """ originfilepath = os.path.join(get_full_staging_path(username), rel_filepath) dataset = Dataset.objects.get(pk=dataset_id) newDatafile = Dataset_File( dataset=dataset, url=originfilepath, protocol="staging", mimetype=mimetype, ) newDatafile.save()
def test_hrmc_filter(self): """ Make an experiment, lood up grexp file and check dataset schema missing, then loadup grfinal and check dataset schema created """ user = _create_test_user() license = _create_license() exp = _create_test_experiment(user, license) ds = Dataset(description='happy snaps of plumage') ds.save() _create_test_dataset(ds, exp.id, { "output.dat": 'hello', "grexp.dat": '2 5\n6 15\n' }) ds.experiments.add(exp) ds.save() sch = Schema(namespace=self.HRMCSCHEMA, name="hrmc_views", type=Schema.DATASET) sch.save() param = ParameterName(schema=sch, name="plot", full_name="scatterplot", units="image", data_type=ParameterName.FILENAME) param.save() param_sets = get_param_sets(ds) self.assertEquals(list(param_sets), []) _create_test_dataset(ds, exp.id, {'grfinal21.dat': "1 3\n5 14\n"}) df2 = Dataset_File(dataset=ds, url='path/grfinal21.dat') df2.save() h = hrmc.HRMCOutput('HRMC', self.HRMCSCHEMA) h(sender=Dataset_File, instance=df2) param_sets = get_param_sets(ds) self.assertEquals([x.schema.namespace for x in param_sets], [self.HRMCSCHEMA])
def _create_datafile(): user = User.objects.create_user('testuser', '*****@*****.**', 'pwd') user.save() UserProfile(user=user).save() Location.force_initialize() full_access = Experiment.PUBLIC_ACCESS_FULL experiment = Experiment.objects.create(title="IIIF Test", created_by=user, public_access=full_access) experiment.save() ObjectACL(content_object=experiment, pluginId='django_user', entityId=str(user.id), isOwner=True, canRead=True, canWrite=True, canDelete=True, aclOwnershipType=ObjectACL.OWNER_OWNED).save() dataset = Dataset() dataset.save() dataset.experiments.add(experiment) dataset.save() # Create new Datafile tempfile = TemporaryUploadedFile('iiif_stored_file', None, None, None) with Image(filename='magick:rose') as img: img.format = 'tiff' img.save(file=tempfile.file) tempfile.file.flush() datafile = Dataset_File(dataset=dataset, size=os.path.getsize(tempfile.file.name), filename='iiif_named_file') replica = Replica(datafile=datafile, url=write_uploaded_file_to_dataset(dataset, tempfile), location=Location.get_default_location()) replica.verify(allowEmptyChecksums=True) datafile.save() replica.datafile = datafile replica.save() return datafile
def _create_datafile(): user = User.objects.create_user("testuser", "*****@*****.**", "pwd") user.save() UserProfile(user=user).save() Location.force_initialize() full_access = Experiment.PUBLIC_ACCESS_FULL experiment = Experiment.objects.create(title="IIIF Test", created_by=user, public_access=full_access) experiment.save() ObjectACL( content_object=experiment, pluginId="django_user", entityId=str(user.id), isOwner=True, canRead=True, canWrite=True, canDelete=True, aclOwnershipType=ObjectACL.OWNER_OWNED, ).save() dataset = Dataset() dataset.save() dataset.experiments.add(experiment) dataset.save() # Create new Datafile tempfile = TemporaryUploadedFile("iiif_stored_file", None, None, None) with Image(filename="magick:rose") as img: img.format = "tiff" img.save(file=tempfile.file) tempfile.file.flush() datafile = Dataset_File(dataset=dataset, size=os.path.getsize(tempfile.file.name), filename="iiif_named_file") replica = Replica( datafile=datafile, url=write_uploaded_file_to_dataset(dataset, tempfile), location=Location.get_default_location(), ) replica.verify(allowEmptyChecksums=True) datafile.save() replica.datafile = datafile replica.save() return datafile
def setUp(self): """ setting up essential objects, copied from tests above """ user = '******' pwd = 'secret' email = '' self.user = User.objects.create_user(user, email, pwd) self.userProfile = UserProfile(user=self.user).save() self.exp = Experiment(title='test exp1', institution_name='monash', created_by=self.user) self.exp.save() self.acl = ObjectACL( pluginId=django_user, entityId=str(self.user.id), content_object=self.exp, canRead=True, isOwner=True, aclOwnershipType=ObjectACL.OWNER_OWNED, ) self.acl.save() self.dataset = Dataset(description='dataset description...') self.dataset.save() self.dataset.experiments.add(self.exp) self.dataset.save() self.dataset_file = Dataset_File(dataset=self.dataset, size=42, filename="foo", md5sum="junk") self.dataset_file.save() self.testschema = Schema(namespace="http://test.com/test/schema", name="Test View", type=Schema.DATAFILE, hidden=True) self.testschema.save() self.dfps = DatafileParameterSet(dataset_file=self.dataset_file, schema=self.testschema) self.dfps.save()
def fpupload(request, dataset_id): """ Uploads all files picked by filepicker to the dataset :param request: a HTTP Request instance :type request: :class:`django.http.HttpRequest` :param dataset_id: the dataset_id :type dataset_id: integer :returns: boolean true if successful :rtype: bool """ dataset = Dataset.objects.get(id=dataset_id) logger.debug('called fpupload') if request.method == 'POST': logger.debug('got POST') for key, val in request.POST.items(): splits = val.split(",") for url in splits: try: fp = FilepickerFile(url) except ValueError: pass else: picked_file = fp.get_file() filepath = write_uploaded_file_to_dataset(dataset, picked_file) datafile = Dataset_File(dataset=dataset, filename=picked_file.name, size=picked_file.size) replica = Replica(datafile=datafile, url=filepath, protocol='', location=Location.get_default_location()) replica.verify(allowEmptyChecksums=True) datafile.save() replica.datafile = datafile replica.save() return HttpResponse(json.dumps({"result": True}))
def fpupload(request, dataset_id): """ Uploads all files picked by filepicker to the dataset :param request: a HTTP Request instance :type request: :class:`django.http.HttpRequest` :param dataset_id: the dataset_id :type dataset_id: integer :returns: boolean true if successful :rtype: bool """ dataset = Dataset.objects.get(id=dataset_id) logger.debug('called fpupload') if request.method == 'POST': logger.debug('got POST') for key, val in request.POST.items(): splits = val.split(",") for url in splits: try: fp = FilepickerFile(url) except ValueError: pass else: picked_file = fp.get_file() filepath = write_uploaded_file_to_dataset( dataset, picked_file) datafile = Dataset_File(dataset=dataset, filename=picked_file.name, size=picked_file.size) replica = Replica(datafile=datafile, url=filepath, protocol='', location=Location.get_default_location()) replica.verify(allowEmptyChecksums=True) datafile.save() replica.datafile = datafile replica.save() return HttpResponse(json.dumps({"result": True}))
def _build_datafile(self, testfile, filename, dataset, url, protocol='', checksum=None, size=None, mimetype=''): filesize, sha512sum = get_size_and_sha512sum(testfile) datafile = Dataset_File( dataset=dataset, filename=filename, mimetype=mimetype, size=str(size if size != None else filesize), sha512sum=(checksum if checksum else sha512sum)) datafile.save() if urlparse.urlparse(url).scheme == '': location = Location.get_location('local') else: location = Location.get_location_for_url(url) if not location: location = Location.load_location({ 'name': filename, 'url': urlparse.urljoin(url, '.'), 'type': 'external', 'priority': 10, 'transfer_provider': 'local' }) replica = Replica(datafile=datafile, protocol=protocol, url=url, location=location) replica.verify() replica.save() return Dataset_File.objects.get(pk=datafile.pk)
def create_datafile(file_path): testfile = path.join(path.dirname(__file__), 'fixtures', file_path) size, sha512sum = get_size_and_sha512sum(testfile) datafile = Dataset_File(dataset=dataset, filename=path.basename(testfile), size=size, sha512sum=sha512sum) datafile.save() base_url = 'file://' + path.abspath(path.dirname(testfile)) location = Location.load_location({ 'name': 'test-flexstation', 'url': base_url, 'type': 'external', 'priority': 10, 'transfer_provider': 'local'}) replica = Replica(datafile=datafile, url='file://'+ path.abspath(testfile), protocol='file', location=location) replica.verify() replica.save() return Dataset_File.objects.get(pk=datafile.pk)
def test_hrmc_filter(self): """ Make an experiment, lood up grexp file and check dataset schema missing, then loadup grfinal and check dataset schema created """ user = _create_test_user() license = _create_license() exp = _create_test_experiment(user, license) ds = Dataset(description='happy snaps of plumage') ds.save() _create_test_dataset(ds, exp.id, {"output.dat": 'hello', "grexp.dat": '2 5\n6 15\n'}) ds.experiments.add(exp) ds.save() sch = Schema(namespace=self.HRMCSCHEMA, name="hrmc_views", type=Schema.DATASET) sch.save() param = ParameterName(schema=sch, name="plot", full_name="scatterplot", units="image", data_type=ParameterName.FILENAME ) param.save() param_sets = get_param_sets(ds) self.assertEquals(list(param_sets), []) _create_test_dataset(ds, exp.id, {'grfinal21.dat': "1 3\n5 14\n"}) df2 = Dataset_File(dataset=ds, url='path/grfinal21.dat') df2.save() h = hrmc.HRMCOutput('HRMC', self.HRMCSCHEMA) h(sender=Dataset_File, instance=df2) param_sets = get_param_sets(ds) self.assertEquals([x.schema.namespace for x in param_sets], [self.HRMCSCHEMA])
def testLocalFile(self): content = urandom(1024) cf = ContentFile(content, 'background_task_testfile') # Create new Datafile datafile = Dataset_File(dataset=self.dataset) datafile.filename = cf.name datafile.size = len(content) datafile.sha512sum = hashlib.sha512(content).hexdigest() datafile.save() replica = Replica(datafile=datafile, url=write_uploaded_file_to_dataset(self.dataset, cf), location=Location.get_default_location()) replica.save() def get_replica(datafile): return Replica.objects.get(datafile=datafile) # undo auto-verify: replica.verified = False replica.save(update_fields=['verified']) # Check that it's not currently verified expect(get_replica(datafile).verified).to_be(False) # Check it verifies verify_files() expect(get_replica(datafile).verified).to_be(True)
def setUp(self): """ setting up essential objects, copied from tests above """ Location.force_initialize() self.location = Location.get_location('local') user = '******' pwd = 'secret' email = '' self.user = User.objects.create_user(user, email, pwd) self.userProfile = UserProfile(user=self.user).save() self.exp = Experiment(title='test exp1', institution_name='monash', created_by=self.user) self.exp.save() self.acl = ObjectACL( pluginId=django_user, entityId=str(self.user.id), content_object=self.exp, canRead=True, isOwner=True, aclOwnershipType=ObjectACL.OWNER_OWNED, ) self.acl.save() self.dataset = Dataset(description='dataset description...') self.dataset.save() self.dataset.experiments.add(self.exp) self.dataset.save() self.dataset_file = Dataset_File(dataset=self.dataset, size=42, filename="foo", md5sum="junk") self.dataset_file.save() self.replica = Replica(datafile=self.dataset_file, url="http://foo", location=self.location, verified=False) self.replica.save()
def _build_datafile(self, testfile, filename, dataset, url, protocol='', checksum=None, size=None, mimetype=''): filesize, sha512sum = get_size_and_sha512sum(testfile) datafile = Dataset_File(dataset=dataset, filename=filename, mimetype=mimetype, size=str(size if size != None else filesize), sha512sum=(checksum if checksum else sha512sum)) datafile.save() if urlparse.urlparse(url).scheme == '': location = Location.get_location('local') else: location = Location.get_location_for_url(url) if not location: location = Location.load_location({ 'name': filename, 'url': urlparse.urljoin(url, '.'), 'type': 'external', 'priority': 10, 'transfer_provider': 'local'}) replica = Replica(datafile=datafile, protocol=protocol, url=url, location=location) replica.verify() replica.save() return Dataset_File.objects.get(pk=datafile.pk)
def create_datafile(index): testfile = path.join(path.dirname(__file__), 'fixtures', 'middleware_test%d.txt' % index) size, sha512sum = get_size_and_sha512sum(testfile) datafile = Dataset_File(dataset=dataset, filename=path.basename(testfile), size=size, sha512sum=sha512sum) datafile.save() base_url = 'file://' + path.abspath(path.dirname(testfile)) location = Location.load_location({ 'name': 'test-middleware', 'url': base_url, 'type': 'external', 'priority': 10, 'transfer_provider': 'local'}) replica = Replica(datafile=datafile, url='file://'+path.abspath(testfile), protocol='file', location=location) replica.save() if index != 1: replica.verified = False replica.save(update_fields=['verified']) return Dataset_File.objects.get(pk=datafile.pk)
def _create_test_dataset(ds, exp_id, fnames): for fname, contents in fnames.items(): dest = os.path.abspath(os.path.join(settings.FILE_STORE_PATH, '%s/%s/' % (exp_id, ds.id))) if not os.path.exists(dest): os.makedirs(dest) testfile = os.path.abspath(os.path.join(dest, fname)) with open(testfile, "w+b") as f: f.write(contents) size, sha512sum = get_size_and_sha512sum(testfile) dataset_file = Dataset_File(dataset=ds, filename=fname, protocol='', size=size, sha512sum=sha512sum, url='%d/%d/%s' % (exp_id, ds.id, fname)) dataset_file.verify() dataset_file.save() return ds
def testLocalFile(self): content = urandom(1024) cf = ContentFile(content, 'background_task_testfile') # Create new Datafile datafile = Dataset_File(dataset=self.dataset) datafile.filename = cf.name datafile.size = len(content) datafile.sha512sum = hashlib.sha512(content).hexdigest() datafile.url = write_uploaded_file_to_dataset(self.dataset, cf) datafile.save() def get_datafile(datafile): return Dataset_File.objects.get(id=datafile.id) # Check that it's not currently verified expect(get_datafile(datafile).verified).to_be(False) # Check it verifies verify_files() expect(get_datafile(datafile).verified).to_be(True)
def create_datafile(index): testfile = path.join(path.dirname(__file__), 'fixtures', 'jeol_sem_test%d.txt' % index) size, sha512sum = get_size_and_sha512sum(testfile) datafile = Dataset_File(dataset=dataset, filename=path.basename(testfile), url='file://'+path.abspath(testfile), protocol='file', size=size, sha512sum=sha512sum) datafile.verify() datafile.save() return datafile
def testRemoteFile(self): content = urandom(1024) with NamedTemporaryFile() as f: # Create new Datafile datafile = Dataset_File(dataset=self.dataset) datafile.filename = 'background_task_testfile' datafile.size = len(content) datafile.sha512sum = hashlib.sha512(content).hexdigest() datafile.save() url = 'file://' + path.abspath(f.name) base_url = 'file://' + path.dirname(path.abspath(f.name)) location = self._get_or_create_local_location( 'test-staging-xxx', base_url, 'external', 10) replica = Replica(datafile=datafile, location=location, url=url) replica.save() def get_replica(replica): try: return Replica.objects.get(id=replica.id) except Replica.DoesNotExist: return None def get_new_replica(datafile): location = Location.get_default_location() return Replica.objects.get(datafile=datafile.id, location=location) # Check that it won't verify as it stands expect(get_replica(replica).verified).to_be(False) verify_files() expect(get_replica(replica).verified).to_be(False) expect(get_replica(replica).is_local()).to_be(False) # Fill in the content f.write(content) f.flush() # Check it now verifies verify_files() expect(get_replica(replica).id).to_be( get_new_replica(datafile).id) expect(get_new_replica(datafile).verified).to_be(True) expect(get_new_replica(datafile).is_local()).to_be(True)
def _create_test_dataset(ds, exp_id, fnames): for fname, contents in fnames.items(): dest = os.path.abspath( os.path.join(settings.FILE_STORE_PATH, '%s/%s/' % (exp_id, ds.id))) if not os.path.exists(dest): os.makedirs(dest) testfile = os.path.abspath(os.path.join(dest, fname)) with open(testfile, "w+b") as f: f.write(contents) size, sha512sum = get_size_and_sha512sum(testfile) dataset_file = Dataset_File(dataset=ds, filename=fname, protocol='', size=size, sha512sum=sha512sum, url='%d/%d/%s' % (exp_id, ds.id, fname)) dataset_file.verify() dataset_file.save() return ds
def process_enclosure(self, dataset, enclosure): filename = getattr(enclosure, 'title', basename(enclosure.href)) datafile = Dataset_File(url=enclosure.href, \ filename=filename, \ dataset=dataset) datafile.protocol = enclosure.href.partition('://')[0] try: datafile.mimetype = enclosure.mime except AttributeError: pass try: datafile.size = enclosure.length except AttributeError: pass try: hash = enclosure.hash # Split on white space, then ':' to get tuples to feed into dict hashdict = dict([s.partition(':')[::2] for s in hash.split()]) # Set SHA-512 sum datafile.sha512sum = hashdict['sha-512'] except AttributeError: pass datafile.save() self.make_local_copy(datafile)
class DownloadTestCase(TestCase): def setUp(self): # create a test user self.user = User.objects.create_user(username='******', email='', password='******') # create a public experiment self.experiment1 = Experiment(title='Experiment 1', created_by=self.user, public_access=Experiment.PUBLIC_ACCESS_FULL) self.experiment1.save() # create a non-public experiment self.experiment2 = Experiment(title='Experiment 2', created_by=self.user, public_access=Experiment.PUBLIC_ACCESS_NONE) self.experiment2.save() # dataset1 belongs to experiment1 self.dataset1 = Dataset() self.dataset1.save() self.dataset1.experiments.add(self.experiment1) self.dataset1.save() # dataset2 belongs to experiment2 self.dataset2 = Dataset() self.dataset2.save() self.dataset2.experiments.add(self.experiment2) self.dataset2.save() # absolute path first filename1 = 'testfile.txt' filename2 = 'testfile.tiff' self.dest1 = abspath(join(settings.FILE_STORE_PATH, '%s/%s/' % (self.experiment1.id, self.dataset1.id))) self.dest2 = abspath(join(settings.FILE_STORE_PATH, '%s/%s/' % (self.experiment2.id, self.dataset2.id))) if not exists(self.dest1): makedirs(self.dest1) if not exists(self.dest2): makedirs(self.dest2) testfile1 = abspath(join(self.dest1, filename1)) f = open(testfile1, 'w') f.write("Hello World!\n") f.close() testfile2 = abspath(join(self.dest2, filename2)) _generate_test_image(testfile2) size, sha512sum = get_size_and_sha512sum(testfile1) self.dataset_file1 = Dataset_File(dataset=self.dataset1, filename=filename1, protocol='', size=size, sha512sum=sha512sum, url='%d/%d/%s' % (self.experiment1.id, self.dataset1.id, filename1)) self.dataset_file1.verify() self.dataset_file1.save() size, sha512sum = get_size_and_sha512sum(testfile2) self.dataset_file2 = Dataset_File(dataset=self.dataset2, filename=basename(filename2), protocol='', size=size, sha512sum=sha512sum, url='%d/%d/%s' % (self.experiment2.id, self.dataset2.id, filename2)) self.dataset_file2.verify() self.dataset_file2.save() def tearDown(self): self.user.delete() self.experiment1.delete() self.experiment2.delete() rmtree(self.dest1) rmtree(self.dest2) def testView(self): client = Client() # check view of file1 response = client.get('/datafile/view/%i/' % self.dataset_file1.id) self.assertEqual(response['Content-Disposition'], 'inline; filename="%s"' % self.dataset_file1.filename) self.assertEqual(response.status_code, 200) self.assertEqual(response.content, 'Hello World!\n') # check view of file2 response = client.get('/datafile/view/%i/' % self.dataset_file2.id) # Should be forbidden self.assertEqual(response.status_code, 403) self.experiment2.public_access=Experiment.PUBLIC_ACCESS_FULL self.experiment2.save() # check view of file2 again response = client.get('/datafile/view/%i/' % self.dataset_file2.id) self.assertEqual(response.status_code, 200) # The following behaviour relies on ImageMagick if IMAGEMAGICK_AVAILABLE: # file2 should have a ".png" filename self.assertEqual(response['Content-Disposition'], 'inline; filename="%s"' % (self.dataset_file2.filename+'.png')) # file2 should be a PNG self.assertEqual(response['Content-Type'], 'image/png') png_signature = "\x89PNG\r\n\x1a\n" self.assertEqual(response.content[0:8], png_signature) else: # file2 should have a ".tiff" filename self.assertEqual(response['Content-Disposition'], 'inline; filename="%s"' % (self.dataset_file2.filename)) # file2 should be a TIFF self.assertEqual(response['Content-Type'], 'image/tiff') tiff_signature = "II\x2a\x00" self.assertEqual(response.content[0:4], tiff_signature) def _check_tar_file(self, content, rootdir, datafiles): # It should be a zip file with NamedTemporaryFile('w') as tempfile: tempfile.write(content) tempfile.flush() with open(tempfile.name, 'r') as zipread: # It should be a zip file (all of which start with "PK") expect(zipread.read(2)).to_equal('PK') expect(is_zipfile(tempfile.name)).to_be_truthy() with ZipFile(tempfile.name, 'r') as zf: expect(len(zf.namelist())).to_equal(len(datafiles)) for df in datafiles: filename = join(rootdir, str(df.dataset.id), df.filename) expect(filename in zf.namelist()).to_be_truthy() def _check_zip_file(self, content, rootdir, datafiles): # It should be a zip file with NamedTemporaryFile('w') as tempfile: tempfile.write(content) tempfile.flush() with open(tempfile.name, 'r') as zipread: # It should be a zip file (all of which start with "PK") expect(zipread.read(2)).to_equal('PK') expect(is_zipfile(tempfile.name)).to_be_truthy() zf = ZipFile(tempfile.name, 'r') expect(len(zf.namelist())).to_equal(len(datafiles)) for df in datafiles: filename = join(rootdir, str(df.dataset.id), df.filename) expect(filename in zf.namelist()).to_be_truthy() zf.close() def testDownload(self): client = Client() # check download for experiment1 response = client.get('/download/experiment/%i/zip/' % self.experiment1.id) self.assertEqual(response['Content-Disposition'], 'attachment; filename="experiment%s-complete.zip"' % self.experiment1.id) self.assertEqual(response.status_code, 200) self._check_zip_file(response.content, str(self.experiment1.id), reduce(lambda x, y: x + y, [ds.dataset_file_set.all() \ for ds in self.experiment1.datasets.all()])) # check download of file1 response = client.get('/download/datafile/%i/' % self.dataset_file1.id) self.assertEqual(response['Content-Disposition'], 'attachment; filename="%s"' % self.dataset_file1.filename) self.assertEqual(response.status_code, 200) self.assertEqual(response.content, 'Hello World!\n') # requesting file2 should be forbidden... response = client.get('/download/datafile/%i/' % self.dataset_file2.id) self.assertEqual(response.status_code, 403) # check dataset1 download response = client.post('/download/datafiles/', {'expid': self.experiment1.id, 'dataset': [self.dataset1.id], 'datafile': []}) self.assertEqual(response.status_code, 200) self._check_zip_file(response.content, 'datasets', self.dataset1.dataset_file_set.all()) # check dataset2 download response = client.post('/download/datafiles/', {'expid': self.experiment2.id, 'dataset': [self.dataset2.id], 'datafile': []}) self.assertEqual(response.status_code, 403) # check datafile1 download via POST response = client.post('/download/datafiles/', {'expid': self.experiment1.id, 'dataset': [], 'datafile': [self.dataset_file1.id]}) self.assertEqual(response.status_code, 200) self._check_zip_file(response.content, 'datasets', [self.dataset_file1]) # check datafile2 download via POST response = client.post('/download/datafiles/', {'expid': self.experiment2.id, 'dataset': [], 'datafile': [self.dataset_file2.id]}) self.assertEqual(response.status_code, 403) # Check datafile2 download with second experiment to "metadata only" self.experiment2.public_access=Experiment.PUBLIC_ACCESS_METADATA self.experiment2.save() response = client.get('/download/datafile/%i/' % self.dataset_file2.id) # Metadata-only means "no file access"! self.assertEqual(response.status_code, 403) # Check datafile2 download with second experiment to public self.experiment2.public_access=Experiment.PUBLIC_ACCESS_FULL self.experiment2.save() response = client.get('/download/datafile/%i/' % self.dataset_file2.id) self.assertEqual(response.status_code, 200) # This should be a TIFF (which often starts with "II\x2a\x00") self.assertEqual(response['Content-Type'], 'image/tiff') self.assertEqual(response.content[0:4], "II\x2a\x00") def testDatasetFile(self): # check registered text file for physical file meta information df = Dataset_File.objects.get(pk=self.dataset_file1.id) try: from magic import Magic self.assertEqual(df.mimetype, 'text/plain; charset=us-ascii') except: # XXX Test disabled because lib magic can't be loaded pass self.assertEqual(df.size, str(13)) self.assertEqual(df.md5sum, '8ddd8be4b179a529afa5f2ffae4b9858') # now check a JPG file filename = abspath(join(dirname(__file__), '../static/images/ands-logo-hi-res.jpg')) dataset = Dataset.objects.get(pk=self.dataset1.id) size, sha512sum = get_size_and_sha512sum(filename) pdf1 = Dataset_File(dataset=dataset, filename=basename(filename), size=str(size), sha512sum=sha512sum, url='file://%s' % filename, protocol='file') pdf1.verify() pdf1.save() try: from magic import Magic self.assertEqual(pdf1.mimetype, 'image/jpeg') except: # XXX Test disabled because lib magic can't be loaded pass self.assertEqual(pdf1.size, str(14232)) self.assertEqual(pdf1.md5sum, 'c450d5126ffe3d14643815204daf1bfb') # now check that we can override the physical file meta information pdf2 = Dataset_File(dataset=dataset, filename=basename(filename), url='file://%s' % filename, protocol='file', mimetype='application/vnd.openxmlformats-officedocument.presentationml.presentation', size=str(0), # Empty string always has the same hash sha512sum='cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e') pdf2.save() try: from magic import Magic self.assertEqual(pdf2.mimetype, 'application/vnd.openxmlformats-officedocument.presentationml.presentation') except: # XXX Test disabled because lib magic can't be loaded pass self.assertEqual(pdf2.size, str(0)) self.assertEqual(pdf2.md5sum, '') pdf2.mimetype = '' pdf2.save() try: from magic import Magic self.assertEqual(pdf2.mimetype, 'application/pdf') except: # XXX Test disabled because lib magic can't be loaded pass
class DownloadTestCase(TestCase): def setUp(self): # create a test user self.user = User.objects.create_user(username='******', email='', password='******') # create a public experiment self.experiment1 = Experiment(title='Experiment 1', created_by=self.user, public=True) self.experiment1.save() # create a non-public experiment self.experiment2 = Experiment(title='Experiment 2', created_by=self.user, public=False) self.experiment2.save() # dataset1 belongs to experiment1 self.dataset1 = Dataset(experiment=self.experiment1) self.dataset1.save() # dataset2 belongs to experiment2 self.dataset2 = Dataset(experiment=self.experiment2) self.dataset2.save() # absolute path first filename = 'testfile.txt' self.dest1 = abspath(join(settings.FILE_STORE_PATH, '%s/%s/' % (self.experiment1.id, self.dataset1.id))) self.dest2 = abspath(join(settings.FILE_STORE_PATH, '%s/%s/' % (self.experiment2.id, self.dataset2.id))) if not exists(self.dest1): makedirs(self.dest1) if not exists(self.dest2): makedirs(self.dest2) testfile1 = abspath(join(self.dest1, filename)) f = open(testfile1, 'w') f.write("Hello World!\n") f.close() testfile2 = abspath(join(self.dest2, filename)) f = open(testfile2, 'w') f.write("Hello World!\n") f.close() self.dataset_file1 = Dataset_File(dataset=self.dataset1, filename=filename, protocol='tardis', url='tardis://%s' % filename) self.dataset_file1.save() self.dataset_file2 = Dataset_File(dataset=self.dataset2, filename=basename(filename), protocol='tardis', url='tardis://%s' % filename) self.dataset_file2.save() def tearDown(self): self.user.delete() self.experiment1.delete() self.experiment2.delete() rmtree(self.dest1) rmtree(self.dest2) def testDownload(self): client = Client() # check download for experiment1 response = client.get('/download/experiment/%i/zip/' % self.experiment1.id) self.assertEqual(response['Content-Disposition'], 'attachment; filename="experiment%s-complete.zip"' % self.experiment1.id) self.assertEqual(response.status_code, 200) # check download of file1 response = client.get('/download/datafile/%i/' % self.dataset_file1.id) self.assertEqual(response['Content-Disposition'], 'attachment; filename="%s"' % self.dataset_file2.filename) self.assertEqual(response.status_code, 200) self.assertEqual(response.content, 'Hello World!\n') # requesting file2 should be forbidden... response = client.get('/download/datafile/%i/' % self.dataset_file2.id) self.assertEqual(response.status_code, 403) # check dataset1 download response = client.post('/download/datafiles/', {'expid': self.experiment1.id, 'dataset': [self.dataset1.id], 'datafile': []}) self.assertEqual(response.status_code, 200) # check dataset2 download response = client.post('/download/datafiles/', {'expid': self.experiment2.id, 'dataset': [self.dataset2.id], 'datafile': []}) self.assertEqual(response.status_code, 403) # check datafile1 download via POST response = client.post('/download/datafiles/', {'expid': self.experiment1.id, 'dataset': [], 'datafile': [self.dataset_file1.id]}) self.assertEqual(response.status_code, 200) # check datafile2 download via POST response = client.post('/download/datafiles/', {'expid': self.experiment2.id, 'dataset': [], 'datafile': [self.dataset_file2.id]}) self.assertEqual(response.status_code, 403) def testDatasetFile(self): # check registered text file for physical file meta information df = Dataset_File.objects.get(pk=self.dataset_file1.id) try: from magic import Magic self.assertEqual(df.mimetype, 'text/plain; charset=us-ascii') except: # XXX Test disabled becuse lib magic can't be loaded pass self.assertEqual(df.size, str(13)) self.assertEqual(df.md5sum, '8ddd8be4b179a529afa5f2ffae4b9858') # now check a JPG file filename = join(abspath(dirname(__file__)), '../static/images/ands-logo-hi-res.jpg') dataset = Dataset.objects.get(pk=self.dataset1.id) pdf1 = Dataset_File(dataset=dataset, filename=basename(filename), url='file://%s' % filename, protocol='file') pdf1.save() try: from magic import Magic self.assertEqual(pdf1.mimetype, 'image/jpeg') except: # XXX Test disabled becuse lib magic can't be loaded pass self.assertEqual(pdf1.size, str(14232)) self.assertEqual(pdf1.md5sum, 'c450d5126ffe3d14643815204daf1bfb') # now check that we can override the physical file meta information pdf2 = Dataset_File(dataset=dataset, filename=basename(filename), url='file://%s' % filename, protocol='file', mimetype='application/vnd.openxmlformats-officedocument.presentationml.presentation', size=str(0), md5sum='md5sum') pdf2.save() try: from magic import Magic self.assertEqual(pdf2.mimetype, 'application/vnd.openxmlformats-officedocument.presentationml.presentation') except: # XXX Test disabled becuse lib magic can't be loaded pass self.assertEqual(pdf2.size, str(0)) self.assertEqual(pdf2.md5sum, 'md5sum') pdf2.mimetype = '' pdf2.save() try: from magic import Magic self.assertEqual(pdf2.mimetype, 'application/pdf') except: # XXX Test disabled becuse lib magic can't be loaded pass
def _make_data_file(dataset, filename, content): # TODO: # create datasetfile f = mktemp() print "Inside make data file ", f open(f, "w+b").write(content) df = Dataset_File() df.dataset = dataset df.filename = filename df.url = 'file://'+f df.protocol = "staging" df.size = len(content) df.verify(allowEmptyChecksums=True) df.save() print "Df ---", df
class DownloadTestCase(TestCase): def setUp(self): # create a test user self.user = User.objects.create_user(username='******', email='', password='******') # create a public experiment self.experiment1 = Experiment(title='Experiment 1', created_by=self.user, public=True) self.experiment1.save() # create a non-public experiment self.experiment2 = Experiment(title='Experiment 2', created_by=self.user, public=False) self.experiment2.save() # dataset1 belongs to experiment1 self.dataset1 = Dataset(experiment=self.experiment1) self.dataset1.save() # dataset2 belongs to experiment2 self.dataset2 = Dataset(experiment=self.experiment2) self.dataset2.save() # absolute path first filename = 'testfile.txt' self.dest1 = abspath(join(settings.FILE_STORE_PATH, '%s/%s/' % (self.experiment1.id, self.dataset1.id))) self.dest2 = abspath(join(settings.FILE_STORE_PATH, '%s/%s/' % (self.experiment2.id, self.dataset2.id))) if not exists(self.dest1): makedirs(self.dest1) if not exists(self.dest2): makedirs(self.dest2) testfile1 = abspath(join(self.dest1, filename)) f = open(testfile1, 'w') f.write("Hello World!\n") f.close() testfile2 = abspath(join(self.dest2, filename)) f = open(testfile2, 'w') f.write("Hello World!\n") f.close() self.dataset_file1 = Dataset_File(dataset=self.dataset1, filename=filename, protocol='tardis', url='tardis://%s' % filename) self.dataset_file1.save() self.dataset_file2 = Dataset_File(dataset=self.dataset2, filename=basename(filename), protocol='tardis', url='tardis://%s' % filename) self.dataset_file2.save() def tearDown(self): self.user.delete() self.experiment1.delete() self.experiment2.delete() rmtree(self.dest1) rmtree(self.dest2) def testDownload(self): client = Client() # check download for experiment1 response = client.get('/download/experiment/%i/zip/' % self.experiment1.id) self.assertEqual(response['Content-Disposition'], 'attachment; filename="experiment%s-complete.zip"' % self.experiment1.id) self.assertEqual(response.status_code, 200) # check download of file1 response = client.get('/download/datafile/%i/' % self.dataset_file1.id) self.assertEqual(response['Content-Disposition'], 'attachment; filename="%s"' % self.dataset_file2.filename) self.assertEqual(response.status_code, 200) self.assertEqual(response.content, 'Hello World!\n') # requesting file2 should be forbidden... response = client.get('/download/datafile/%i/' % self.dataset_file2.id) self.assertEqual(response.status_code, 403) # check dataset1 download response = client.post('/download/datafiles/', {'expid': self.experiment1.id, 'dataset': [self.dataset1.id], 'datafile': []}) self.assertEqual(response.status_code, 200) # check dataset2 download response = client.post('/download/datafiles/', {'expid': self.experiment2.id, 'dataset': [self.dataset2.id], 'datafile': []}) self.assertEqual(response.status_code, 403) # check datafile1 download via POST response = client.post('/download/datafiles/', {'expid': self.experiment1.id, 'dataset': [], 'datafile': [self.dataset_file1.id]}) self.assertEqual(response.status_code, 200) # check datafile2 download via POST response = client.post('/download/datafiles/', {'expid': self.experiment2.id, 'dataset': [], 'datafile': [self.dataset_file2.id]}) self.assertEqual(response.status_code, 403) def testDatasetFile(self): # check registered text file for physical file meta information df = Dataset_File.objects.get(pk=self.dataset_file1.id) try: from magic import Magic self.assertEqual(df.mimetype, 'text/plain; charset=us-ascii') except: # XXX Test disabled becuse lib magic can't be loaded pass self.assertEqual(df.size, str(13)) self.assertEqual(df.md5sum, '8ddd8be4b179a529afa5f2ffae4b9858') # now check a pdf file filename = join(abspath(dirname(__file__)), '../static/downloads/DatasetDepositionGuide.pdf') dataset = Dataset.objects.get(pk=self.dataset1.id) pdf1 = Dataset_File(dataset=dataset, filename=basename(filename), url='file://%s' % filename, protocol='file') pdf1.save() try: from magic import Magic self.assertEqual(pdf1.mimetype, 'application/pdf') except: # XXX Test disabled becuse lib magic can't be loaded pass self.assertEqual(pdf1.size, str(1008475)) self.assertEqual(pdf1.md5sum, '9192b3d3e0056412b1d21d3e33562eba') # now check that we can override the physical file meta information pdf2 = Dataset_File(dataset=dataset, filename=basename(filename), url='file://%s' % filename, protocol='file', mimetype='application/vnd.openxmlformats-officedocument.presentationml.presentation', size=str(0), md5sum='md5sum') pdf2.save() try: from magic import Magic self.assertEqual(pdf2.mimetype, 'application/vnd.openxmlformats-officedocument.presentationml.presentation') except: # XXX Test disabled becuse lib magic can't be loaded pass self.assertEqual(pdf2.size, str(0)) self.assertEqual(pdf2.md5sum, 'md5sum') pdf2.mimetype = '' pdf2.save() try: from magic import Magic self.assertEqual(pdf2.mimetype, 'application/pdf') except: # XXX Test disabled becuse lib magic can't be loaded pass
def testDatasetFile(self): # check registered text file for physical file meta information df = Dataset_File.objects.get(pk=self.dataset_file1.id) try: from magic import Magic self.assertEqual(df.mimetype, 'text/plain; charset=us-ascii') except: # XXX Test disabled becuse lib magic can't be loaded pass self.assertEqual(df.size, str(13)) self.assertEqual(df.md5sum, '8ddd8be4b179a529afa5f2ffae4b9858') # now check a pdf file filename = join(abspath(dirname(__file__)), '../static/downloads/DatasetDepositionGuide.pdf') dataset = Dataset.objects.get(pk=self.dataset1.id) pdf1 = Dataset_File(dataset=dataset, filename=basename(filename), url='file://%s' % filename, protocol='file') pdf1.save() try: from magic import Magic self.assertEqual(pdf1.mimetype, 'application/pdf') except: # XXX Test disabled becuse lib magic can't be loaded pass self.assertEqual(pdf1.size, str(1008475)) self.assertEqual(pdf1.md5sum, '9192b3d3e0056412b1d21d3e33562eba') # now check that we can override the physical file meta information pdf2 = Dataset_File(dataset=dataset, filename=basename(filename), url='file://%s' % filename, protocol='file', mimetype='application/vnd.openxmlformats-officedocument.presentationml.presentation', size=str(0), md5sum='md5sum') pdf2.save() try: from magic import Magic self.assertEqual(pdf2.mimetype, 'application/vnd.openxmlformats-officedocument.presentationml.presentation') except: # XXX Test disabled becuse lib magic can't be loaded pass self.assertEqual(pdf2.size, str(0)) self.assertEqual(pdf2.md5sum, 'md5sum') pdf2.mimetype = '' pdf2.save() try: from magic import Magic self.assertEqual(pdf2.mimetype, 'application/pdf') except: # XXX Test disabled becuse lib magic can't be loaded pass
class ContextualViewTest(TestCase): def setUp(self): """ setting up essential objects, copied from tests above """ user = '******' pwd = 'secret' email = '' self.user = User.objects.create_user(user, email, pwd) self.userProfile = UserProfile(user=self.user).save() self.exp = Experiment(title='test exp1', institution_name='monash', created_by=self.user) self.exp.save() self.acl = ObjectACL( pluginId=django_user, entityId=str(self.user.id), content_object=self.exp, canRead=True, isOwner=True, aclOwnershipType=ObjectACL.OWNER_OWNED, ) self.acl.save() self.dataset = Dataset(description='dataset description...') self.dataset.save() self.dataset.experiments.add(self.exp) self.dataset.save() self.dataset_file = Dataset_File(dataset=self.dataset, size=42, filename="foo", md5sum="junk") self.dataset_file.save() self.testschema = Schema(namespace="http://test.com/test/schema", name="Test View", type=Schema.DATAFILE, hidden=True) self.testschema.save() self.dfps = DatafileParameterSet(dataset_file=self.dataset_file, schema=self.testschema) self.dfps.save() def tearDown(self): self.user.delete() self.exp.delete() self.dataset.delete() self.dataset_file.delete() self.testschema.delete() self.dfps.delete() self.acl.delete() def testDetailsDisplay(self): """ test display of view for an existing schema and no display for an undefined one. """ from tardis.tardis_portal.views import display_datafile_details request = flexmock(user=self.user, groups=[("testgroup", flexmock())]) with self.settings(DATAFILE_VIEWS=[( "http://test.com/test/schema", "/test/url"), ("http://does.not.exist", "/false/url")]): response = display_datafile_details( request, dataset_file_id=self.dataset_file.id) self.assertEqual(response.status_code, 200) self.assertTrue("/ajax/parameters/" in response.content) self.assertTrue("/test/url" in response.content) self.assertFalse("/false/url" in response.content)
def testDatasetFile(self): # check registered text file for physical file meta information df = Dataset_File.objects.get(pk=self.dataset_file1.id) try: from magic import Magic self.assertEqual(df.mimetype, 'text/plain; charset=us-ascii') except: # XXX Test disabled becuse lib magic can't be loaded pass self.assertEqual(df.size, str(13)) self.assertEqual(df.md5sum, '8ddd8be4b179a529afa5f2ffae4b9858') # now check a JPG file filename = join(abspath(dirname(__file__)), '../static/images/ands-logo-hi-res.jpg') dataset = Dataset.objects.get(pk=self.dataset1.id) pdf1 = Dataset_File(dataset=dataset, filename=basename(filename), url='file://%s' % filename, protocol='file') pdf1.save() try: from magic import Magic self.assertEqual(pdf1.mimetype, 'image/jpeg') except: # XXX Test disabled becuse lib magic can't be loaded pass self.assertEqual(pdf1.size, str(14232)) self.assertEqual(pdf1.md5sum, 'c450d5126ffe3d14643815204daf1bfb') # now check that we can override the physical file meta information pdf2 = Dataset_File( dataset=dataset, filename=basename(filename), url='file://%s' % filename, protocol='file', mimetype= 'application/vnd.openxmlformats-officedocument.presentationml.presentation', size=str(0), md5sum='md5sum') pdf2.save() try: from magic import Magic self.assertEqual( pdf2.mimetype, 'application/vnd.openxmlformats-officedocument.presentationml.presentation' ) except: # XXX Test disabled becuse lib magic can't be loaded pass self.assertEqual(pdf2.size, str(0)) self.assertEqual(pdf2.md5sum, 'md5sum') pdf2.mimetype = '' pdf2.save() try: from magic import Magic self.assertEqual(pdf2.mimetype, 'application/pdf') except: # XXX Test disabled becuse lib magic can't be loaded pass
def testDatasetFile(self): # check registered text file for physical file meta information df = Dataset_File.objects.get(pk=self.dataset_file1.id) try: from magic import Magic self.assertEqual(df.mimetype, 'text/plain; charset=us-ascii') except: # XXX Test disabled because lib magic can't be loaded pass self.assertEqual(df.size, str(13)) self.assertEqual(df.md5sum, '8ddd8be4b179a529afa5f2ffae4b9858') # now check a JPG file filename = abspath(join(dirname(__file__), '../static/images/ands-logo-hi-res.jpg')) dataset = Dataset.objects.get(pk=self.dataset1.id) size, sha512sum = get_size_and_sha512sum(filename) pdf1 = Dataset_File(dataset=dataset, filename=basename(filename), size=str(size), sha512sum=sha512sum, url='file://%s' % filename, protocol='file') pdf1.verify() pdf1.save() try: from magic import Magic self.assertEqual(pdf1.mimetype, 'image/jpeg') except: # XXX Test disabled because lib magic can't be loaded pass self.assertEqual(pdf1.size, str(14232)) self.assertEqual(pdf1.md5sum, 'c450d5126ffe3d14643815204daf1bfb') # now check that we can override the physical file meta information pdf2 = Dataset_File(dataset=dataset, filename=basename(filename), url='file://%s' % filename, protocol='file', mimetype='application/vnd.openxmlformats-officedocument.presentationml.presentation', size=str(0), # Empty string always has the same hash sha512sum='cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e') pdf2.save() try: from magic import Magic self.assertEqual(pdf2.mimetype, 'application/vnd.openxmlformats-officedocument.presentationml.presentation') except: # XXX Test disabled because lib magic can't be loaded pass self.assertEqual(pdf2.size, str(0)) self.assertEqual(pdf2.md5sum, '') pdf2.mimetype = '' pdf2.save() try: from magic import Magic self.assertEqual(pdf2.mimetype, 'application/pdf') except: # XXX Test disabled because lib magic can't be loaded pass
def addfiles(request): import os from os.path import basename from os import path from tardis.tardis_portal.models import Dataset_File import itertools from tardis.hpctardis.metadata import process_all_experiments from tardis.hpctardis.metadata import process_experimentX if 'username' in request.POST and \ 'password' in request.POST: authMethod = request.POST['authMethod'] user = auth_service.authenticate(authMethod=authMethod, request=request) if user: eid = request.POST['eid'] desc = request.POST['desc'] folder = request.POST['folder'] eid = int(eid) # TODO Use the try and except auth_key = settings.DEFAULT_AUTH try: exp = Experiment.objects.get(pk=eid) author = exp.created_by except Experiment.DoesNotExist: logger.exception( 'Experiment for eid %i in addfiles does not exist' % eid) return HttpResponse("Experiment Not Found") current_user = str(user) created_user = str(author) if current_user == created_user: staging = path.join(settings.STAGING_PATH, str(user), str(eid), str(folder)) filelist = [] ds_desc = {} # import pdb # pdb.set_trace() for root, dirs, files in os.walk(staging): for named in files: filelist.append(named) next = str(filelist) ds_desc[desc] = filelist #TODO If needed for security - Metadata from the folder can be extracted #to check the folder name for d, df in ds_desc.items(): dataset = models.Dataset(description=d, experiment=exp) dataset.save() for f in df: logger.debug('f = %s' % f) filepath = path.join(staging, f) size = path.getsize(filepath) filename = path.basename(filepath) datafile = Dataset_File(dataset=dataset, filename=filename, url=filepath, size=size, protocol='staging') datafile.save() next = next + ' File path :' + staging process_experimentX(exp) next = next + ' The Author is : ' + str( author) + ',' + ' The User is : ' + str(user) return HttpResponse(next) else: next = 'The author of the experiment can only add the files (From Tardis)' return HttpResponse(next) else: return HttpResponse("UnSuccessful")
def setUp(self): # create a test user self.user = User.objects.create_user(username='******', email='', password='******') # create a public experiment self.experiment1 = Experiment(title='Experiment 1', created_by=self.user, public_access=Experiment.PUBLIC_ACCESS_FULL) self.experiment1.save() # create a non-public experiment self.experiment2 = Experiment(title='Experiment 2', created_by=self.user, public_access=Experiment.PUBLIC_ACCESS_NONE) self.experiment2.save() # dataset1 belongs to experiment1 self.dataset1 = Dataset() self.dataset1.save() self.dataset1.experiments.add(self.experiment1) self.dataset1.save() # dataset2 belongs to experiment2 self.dataset2 = Dataset() self.dataset2.save() self.dataset2.experiments.add(self.experiment2) self.dataset2.save() # absolute path first filename1 = 'testfile.txt' filename2 = 'testfile.tiff' self.dest1 = abspath(join(settings.FILE_STORE_PATH, '%s/%s/' % (self.experiment1.id, self.dataset1.id))) self.dest2 = abspath(join(settings.FILE_STORE_PATH, '%s/%s/' % (self.experiment2.id, self.dataset2.id))) if not exists(self.dest1): makedirs(self.dest1) if not exists(self.dest2): makedirs(self.dest2) testfile1 = abspath(join(self.dest1, filename1)) f = open(testfile1, 'w') f.write("Hello World!\n") f.close() testfile2 = abspath(join(self.dest2, filename2)) _generate_test_image(testfile2) size, sha512sum = get_size_and_sha512sum(testfile1) self.dataset_file1 = Dataset_File(dataset=self.dataset1, filename=filename1, protocol='', size=size, sha512sum=sha512sum, url='%d/%d/%s' % (self.experiment1.id, self.dataset1.id, filename1)) self.dataset_file1.verify() self.dataset_file1.save() size, sha512sum = get_size_and_sha512sum(testfile2) self.dataset_file2 = Dataset_File(dataset=self.dataset2, filename=basename(filename2), protocol='', size=size, sha512sum=sha512sum, url='%d/%d/%s' % (self.experiment2.id, self.dataset2.id, filename2)) self.dataset_file2.verify() self.dataset_file2.save()
def addfiles(request): import os from os.path import basename from os import path from tardis.tardis_portal.models import Dataset_File import itertools from tardis.apps.hpctardis.metadata import process_all_experiments from tardis.apps.hpctardis.metadata import process_experimentX if "username" in request.POST and "password" in request.POST: authMethod = request.POST["authMethod"] user = auth_service.authenticate(authMethod=authMethod, request=request) if user: eid = request.POST["eid"] desc = request.POST["desc"] folder = request.POST["folder"] eid = int(eid) # TODO Use the try and except auth_key = settings.DEFAULT_AUTH try: exp = Experiment.objects.get(pk=eid) author = exp.created_by except Experiment.DoesNotExist: logger.exception("Experiment for eid %i in addfiles does not exist" % eid) return HttpResponse("Experiment Not Found") current_user = str(user) created_user = str(author) if current_user == created_user: staging = path.join(settings.STAGING_PATH, str(user), str(eid), str(folder)) filelist = [] ds_desc = {} # import pdb # pdb.set_trace() for root, dirs, files in os.walk(staging): for named in files: filelist.append(named) next = str(filelist) ds_desc[desc] = filelist # TODO If needed for security - Metadata from the folder can be extracted # to check the folder name for d, df in ds_desc.items(): dataset = models.Dataset(description=d, experiment=exp) dataset.save() for f in df: logger.debug("f = %s" % f) filepath = path.join(staging, f) size = path.getsize(filepath) filename = path.basename(filepath) datafile = Dataset_File( dataset=dataset, filename=filename, url=filepath, size=size, protocol="staging" ) datafile.save() next = next + " File path :" + staging process_experimentX(exp) next = next + " The Author is : " + str(author) + "," + " The User is : " + str(user) return HttpResponse(next) else: next = "The author of the experiment can only add the files (From Tardis)" return HttpResponse(next) else: return HttpResponse("UnSuccessful")
def generate_datafile(path, dataset, content=None, size=-1, verify=True, verified=True, verify_checksums_req=False): '''Generates a datafile AND a replica to hold its contents''' from tardis.tardis_portal.models import Dataset_File, Replica, Location saved = settings.REQUIRE_DATAFILE_CHECKSUMS settings.REQUIRE_DATAFILE_CHECKSUMS = False try: datafile = Dataset_File() if content: datafile.size = str(len(content)) else: datafile.size = str(size) # Normally we use any old string for the datafile path, but some # tests require the path to be the same as what 'staging' would use if path == None: datafile.dataset_id = dataset.id datafile.save() path = "%s/%s/%s" % (dataset.get_first_experiment().id, dataset.id, datafile.id) filepath = os.path.normpath(settings.FILE_STORE_PATH + '/' + path) if content: try: os.makedirs(os.path.dirname(filepath)) os.remove(filepath) except: pass gen_file = open(filepath, 'wb+') gen_file.write(content) gen_file.close() datafile.mimetype = "application/unspecified" datafile.filename = os.path.basename(filepath) datafile.dataset_id = dataset.id datafile.save() settings.REQUIRE_DATAFILE_CHECKSUMS = verify_checksums_req location = _infer_location(path) replica = Replica(datafile=datafile, url=path, protocol='', location=location) if verify and content: if not replica.verify(): raise RuntimeError('verify failed!?!') replica.save() replica.verified = verified replica.save(update_fields=['verified']) # force no verification return (datafile, replica) finally: settings.REQUIRE_DATAFILE_CHECKSUMS = saved
def process_enclosure(self, dataset, enclosure): ''' Examines one "enclosure" from an entry, representing a datafile. Determines whether to process it, and if so, starts the transfer. ''' # TODO tjdett: This method needs a clean-up, as it's doing many more things than was originally intended. It now contains more more code about # deciding whether to process the enclosure than it does about actually processing it. That decision, or the influencing factors, should be refactored into separate methods. # Python has built-in time deltas and Django has time formatting functions, both of which would clean this code up considerably. def _get_enclosure_url(enclosure): ''' Optionally manipulate datafile URL, eg: http://foo.edu/bar.txt -> file:////fooserver/bar.txt''' if IngestOptions.USE_LOCAL_TRANSFERS: return enclosure.href.replace(IngestOptions.URL_BASE_TO_REPLACE, IngestOptions.LOCAL_SOURCE_PATH) else: return enclosure.href filename = getattr(enclosure, 'title', basename(enclosure.href)) # check if we were provided a full path, and hence a subdirectory for the file if (IngestOptions.DATAFILE_DIRECTORY_DEPTH >= 1 and getattr(enclosure, "path", "") != "" and enclosure.path.split("/")[IngestOptions.DATAFILE_DIRECTORY_DEPTH:] != ""): filename = "/".join(enclosure.path.split("/")[IngestOptions.DATAFILE_DIRECTORY_DEPTH:]) datafiles = dataset.dataset_file_set.filter(filename=filename) def fromunix1000 (tstr): return datetime.datetime.utcfromtimestamp(float(tstr)/1000) if datafiles.count() > 0: datafile = datafiles[0] from django.db.models import Max newest=datafiles.aggregate(Max('modification_time'))['modification_time__max'] if not newest:# datafile.modification_time: ### rethink this! return # We have this file, it has no time/date, let's skip it. def total_seconds(td): # exists on datetime.timedelta in Python 2.7 return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6 timediff = total_seconds(fromunix1000(enclosure.modified) - newest) if timediff == 0: return # We have this file already, same time/date. elif timediff < 0: logging.getLogger(__name__).warn("Skipping datafile. File to ingest '{0}' is {1} *older* than stored file. Are the system clocks correct?". format(enclosure.href, self.human_time(-timediff))) return else: if not IngestOptions.ALLOW_UPDATING_DATAFILES: logging.getLogger(__name__).warn("Skipping datafile. ALLOW_UPDATING_DATAFILES is disabled, and '{0}' is {1}newer than stored file.". format(enclosure.href, self.human_time(timediff))) return logging.getLogger(__name__).info("Ingesting updated datafile. File to ingest '{0}' is {1} newer than stored file. This will create an additional copy.". format(enclosure.href, self.human_time(timediff))) if IngestOptions.HIDE_REPLACED_DATAFILES: # Mark all older versions of file as hidden. (!) try: from tardis.microtardis.models import Dataset_Hidden Dataset_Hidden.objects.filter(datafile__dataset=dataset).update(hidden=True) except ImportError: logger.warn("The MicroTardis app must be installed in order to use the HIDE_REPLACED_DATAFILES option. Existing version of datafile {0} " + "will not be hidden.".format(datafile.filename)) else: # no local copy already. logging.getLogger(__name__).info("Ingesting datafile: '{0}'".format(enclosure.href)) # Create a record and start transferring. datafile = Dataset_File(dataset=dataset, url=_get_enclosure_url(enclosure), filename=filename, created_time=fromunix1000(enclosure.created), modification_time=fromunix1000(enclosure.modified)) datafile.protocol = enclosure.href.partition('://')[0] datafile.mimetype = getattr(enclosure, "mime", datafile.mimetype) datafile.size = getattr(enclosure, "length", datafile.size) try: hash = enclosure.hash # Split on white space, then ':' to get tuples to feed into dict hashdict = dict([s.partition(':')[::2] for s in hash.split()]) # Set SHA-512 sum datafile.sha512sum = hashdict['sha-512'] except AttributeError: pass datafile.save()
class ViewTemplateContextsTest(TestCase): def setUp(self): """ setting up essential objects, copied from tests above """ Location.force_initialize() self.location = Location.get_location('local') user = '******' pwd = 'secret' email = '' self.user = User.objects.create_user(user, email, pwd) self.userProfile = UserProfile(user=self.user).save() self.exp = Experiment(title='test exp1', institution_name='monash', created_by=self.user) self.exp.save() self.acl = ObjectACL( pluginId=django_user, entityId=str(self.user.id), content_object=self.exp, canRead=True, isOwner=True, aclOwnershipType=ObjectACL.OWNER_OWNED, ) self.acl.save() self.dataset = Dataset(description='dataset description...') self.dataset.save() self.dataset.experiments.add(self.exp) self.dataset.save() self.dataset_file = Dataset_File(dataset=self.dataset, size=42, filename="foo", md5sum="junk") self.dataset_file.save() self.replica = Replica(datafile=self.dataset_file, url="http://foo", location=self.location, verified=False) self.replica.save() def tearDown(self): self.user.delete() self.exp.delete() self.dataset.delete() self.dataset_file.delete() self.acl.delete() def testExperimentView(self): """ test some template context parameters for an experiment view """ from tardis.tardis_portal.views import view_experiment from tardis.tardis_portal.shortcuts import render_response_index from django.http import HttpRequest from django.template import Context import sys # Default behavior views_module = flexmock(sys.modules['tardis.tardis_portal.views']) request = HttpRequest() request.user = self.user request.groups = [] context = { 'organization': ['test', 'test2'], 'default_organization': 'test', 'default_format': 'tar', 'protocol': [['tgz', '/download/experiment/1/tgz/'], ['tar', '/download/experiment/1/tar/']] } views_module.should_call('render_response_index'). \ with_args(_AnyMatcher(), "tardis_portal/view_experiment.html", _ContextMatcher(context)) response = view_experiment(request, experiment_id=self.exp.id) self.assertEqual(response.status_code, 200) # Behavior with USER_AGENT_SENSING enabled and a request.user_agent saved_setting = getattr(settings, "USER_AGENT_SENSING", None) try: setattr(settings, "USER_AGENT_SENSING", True) request = HttpRequest() request.user = self.user request.groups = [] mock_agent = _MiniMock(os=_MiniMock(family="Macintosh")) setattr(request, 'user_agent', mock_agent) context = { 'organization': ['classic', 'test', 'test2'], 'default_organization': 'classic', 'default_format': 'tar', 'protocol': [['tar', '/download/experiment/1/tar/']] } views_module.should_call('render_response_index'). \ with_args(_AnyMatcher(), "tardis_portal/view_experiment.html", _ContextMatcher(context)) response = view_experiment(request, experiment_id=self.exp.id) self.assertEqual(response.status_code, 200) finally: if saved_setting != None: setattr(settings, "USER_AGENT_SENSING", saved_setting) else: delattr(settings, "USER_AGENT_SENSING") def testDatasetView(self): """ test some context parameters for a dataset view """ from tardis.tardis_portal.views import view_dataset from tardis.tardis_portal.shortcuts import render_response_index from django.http import HttpRequest from django.template import Context import sys views_module = flexmock(sys.modules['tardis.tardis_portal.views']) request = HttpRequest() request.user = self.user request.groups = [] context = {'default_organization': 'test', 'default_format': 'tar'} views_module.should_call('render_response_index'). \ with_args(_AnyMatcher(), "tardis_portal/view_dataset.html", _ContextMatcher(context)) response = view_dataset(request, dataset_id=self.dataset.id) self.assertEqual(response.status_code, 200) # Behavior with USER_AGENT_SENSING enabled and a request.user_agent saved_setting = getattr(settings, "USER_AGENT_SENSING", None) try: setattr(settings, "USER_AGENT_SENSING", True) request = HttpRequest() request.user = self.user request.groups = [] mock_agent = _MiniMock(os=_MiniMock(family="Macintosh")) setattr(request, 'user_agent', mock_agent) context = { 'default_organization': 'classic', 'default_format': 'tar' } views_module.should_call('render_response_index'). \ with_args(_AnyMatcher(), "tardis_portal/view_dataset.html", _ContextMatcher(context)) response = view_dataset(request, dataset_id=self.dataset.id) self.assertEqual(response.status_code, 200) finally: if saved_setting != None: setattr(settings, "USER_AGENT_SENSING", saved_setting) else: delattr(settings, "USER_AGENT_SENSING")