def test_datafile(self): from tardis.tardis_portal import models exp = models.Experiment(title='test exp1', institution_name='monash', approved=True, created_by=self.user, public=False, ) exp.save() dataset = models.Dataset(description="dataset description...", experiment=exp) dataset.save() df_file = models.Dataset_File(dataset=dataset, filename='file.txt', url='path/file.txt', ) df_file.save() self.assertEqual(df_file.filename, 'file.txt') self.assertEqual(df_file.url, 'path/file.txt') self.assertEqual(df_file.protocol, '') self.assertEqual(df_file.dataset, dataset) self.assertEqual(df_file.size, '') self.assertEqual(df_file.get_download_url(), '/test/download/datafile/1/') df_file = models.Dataset_File(dataset=dataset, filename='file1.txt', url='path/file1.txt', protocol='vbl', ) df_file.save() self.assertEqual(df_file.filename, 'file1.txt') self.assertEqual(df_file.url, 'path/file1.txt') self.assertEqual(df_file.protocol, 'vbl') self.assertEqual(df_file.dataset, dataset) self.assertEqual(df_file.size, '') self.assertEqual(df_file.get_download_url(), '/test/vbl/download/datafile/2/') df_file = models.Dataset_File(dataset=dataset, filename='file1.txt', url='http://localhost:8080/filestore/file1.txt', protocol='', ) df_file.save() self.assertEqual(df_file.filename, 'file1.txt') self.assertEqual(df_file.url, 'http://localhost:8080/filestore/file1.txt') self.assertEqual(df_file.protocol, '') self.assertEqual(df_file.dataset, dataset) self.assertEqual(df_file.size, '') self.assertEqual(df_file.get_download_url(), '/test/download/datafile/3/')
def _create_experiment(self, data=None): from tardis.tardis_portal import models, forms from os.path import basename from django.contrib.auth.models import User data = self._data_to_post(data) exp = models.Experiment( title=data['title'], institution_name=data['institution_name'], description=data['description'], created_by=User.objects.get(id=data['created_by']), ) exp.save() for i, a in enumerate(data['authors'].split(', ')): ae = models.Author_Experiment(experiment=exp, author=a, order=i) ae.save() ds_desc = { 'first one': ['file/another.py'], 'second': ['second_ds/file.py', 'second_ds/file1.py'] } for d, df in ds_desc.items(): dataset = models.Dataset(description=d, experiment=exp) dataset.save() for f in df: d = models.Dataset_File(url='file://' + f, dataset=dataset, filename=basename(f)) d.save() return exp
def test_save_metadata(self): from os import path try: from tardis.tardis_portal.filters.exif import EXIFFilter except: raise SkipTest() from tardis.tardis_portal import models filename = path.join(path.abspath(path.dirname(__file__)), 'test.jpg') exp = models.Experiment(title='test exp1', institution_name='monash', approved=True, created_by=self.user, public=False) exp.save() dataset = models.Dataset(description="dataset description...", experiment=exp) dataset.save() df_file = models.Dataset_File(dataset=dataset, filename='file1.txt', url=filename, protocol='') df_file.save() f = EXIFFilter("EXIF", "http://exif.schema") metadata = f.getExif(filename) parameters = f.getParamaters(f.getSchema(), metadata) ps = f.saveExifMetadata(df_file, f.getSchema(), metadata) self.assertEqual(len(ps.datafileparameter_set.all()), 14)
def setUp(self): from tardis.tardis_portal import models from tempfile import mkdtemp, mktemp from django.conf import settings from os import path import os # Disconnect post_save signal from django.db.models.signals import post_save from tardis.tardis_portal.models import staging_hook, Dataset_File post_save.disconnect(staging_hook, sender=Dataset_File) from django.contrib.auth.models import User user = '******' pwd = 'secret' email = '' self.user = User.objects.create_user(user, email, pwd) try: os.makedirs(settings.GET_FULL_STAGING_PATH_TEST) except OSError: pass self.temp = mkdtemp(dir=settings.GET_FULL_STAGING_PATH_TEST) self.file = mktemp(dir=self.temp) content = 'test file' with open(self.file, "w+b") as f: f.write(content) # make datafile exp = models.Experiment( title='test exp1', institution_name='monash', created_by=self.user, ) exp.save() # make dataset dataset = models.Dataset(description="dataset description...") dataset.save() dataset.experiments.add(exp) dataset.save() # create datasetfile df = models.Dataset_File() df.dataset = dataset df.filename = path.basename(self.file) df.url = 'file://' + self.file df.protocol = "staging" df.size = len(content) df.verify(allowEmptyChecksums=True) df.save() self.df = df
def setUp(self): from tardis.tardis_portal import models from tempfile import mkdtemp, mktemp from django.conf import settings from os import path import os # Disconnect post_save signal from django.db.models.signals import post_save from tardis.tardis_portal.models import \ staging_hook, Dataset_File, Replica, Location post_save.disconnect(staging_hook, sender=Replica) from django.contrib.auth.models import User user = '******' pwd = 'secret' email = '' self.user = User.objects.create_user(user, email, pwd) try: os.makedirs(settings.GET_FULL_STAGING_PATH_TEST) except OSError: pass self.temp = mkdtemp(dir=settings.GET_FULL_STAGING_PATH_TEST) self.file = mktemp(dir=self.temp) content = 'test file' with open(self.file, "w+b") as f: f.write(content) Location.force_initialize() # make datafile exp = models.Experiment(title='test exp1', institution_name='monash', created_by=self.user) exp.save() # make dataset dataset = models.Dataset(description="dataset description...") dataset.save() dataset.experiments.add(exp) dataset.save() # create datafile df = models.Dataset_File(dataset=dataset, size=len(content), filename=path.basename(self.file), md5sum='f20d9f2072bbeb6691c0f9c5099b01f3') df.save() # create replica base_url = 'file://' + settings.GET_FULL_STAGING_PATH_TEST location = Location.load_location({ 'name': 'staging-test-yyy', 'url': base_url, 'type': 'external', 'priority': 10, 'transfer_provider': 'local' }) replica = models.Replica(datafile=df, url='file://' + self.file, protocol="staging", location=location) replica.verify() replica.save() self.replica = replica
def test_parameter(self): from tardis.tardis_portal import models exp = models.Experiment( title='test exp1', institution_name='Australian Synchrotron', approved=True, created_by=self.user, public_access=models.Experiment.PUBLIC_ACCESS_NONE, ) exp.save() dataset = models.Dataset(description="dataset description") dataset.save() dataset.experiments.add(exp) dataset.save() df_file = models.Dataset_File( dataset=dataset, filename='file.txt', url='path/file.txt', ) df_file.save() df_schema = models.Schema( namespace='http://www.cern.ch/felzmann/schema1.xml', type=models.Schema.DATAFILE) df_schema.save() ds_schema = models.Schema( namespace='http://www.cern.ch/felzmann/schema2.xml', type=models.Schema.DATASET) ds_schema.save() exp_schema = models.Schema( namespace='http://www.cern.ch/felzmann/schema3.xml', type=models.Schema.EXPERIMENT) exp_schema.save() df_parname = models.ParameterName( schema=df_schema, name='name', full_name='full_name', units='image/jpg', data_type=models.ParameterName.FILENAME) df_parname.save() ds_parname = models.ParameterName( schema=ds_schema, name='name', full_name='full_name', units='image/jpg', data_type=models.ParameterName.FILENAME) ds_parname.save() exp_parname = models.ParameterName( schema=exp_schema, name='name', full_name='full_name', units='image/jpg', data_type=models.ParameterName.FILENAME) exp_parname.save() df_parset = models.DatafileParameterSet(schema=df_schema, dataset_file=df_file) df_parset.save() ds_parset = models.DatasetParameterSet(schema=ds_schema, dataset=dataset) ds_parset.save() exp_parset = models.ExperimentParameterSet(schema=exp_schema, experiment=exp) exp_parset.save() from base64 import b64encode from os import path from os import remove filename = path.join(path.dirname(__file__), 'test.jpg') df_parameter = models.DatafileParameter(name=df_parname, parameterset=df_parset, string_value=b64encode( open(filename).read())) df_parameter.save() ds_parameter = models.DatasetParameter(name=ds_parname, parameterset=ds_parset, string_value=b64encode( open(filename).read())) ds_parameter.save() exp_parameter = models.ExperimentParameter(name=exp_parname, parameterset=exp_parset, string_value=b64encode( open(filename).read())) exp_parameter.save() self.assertEqual( "<a href='/test/DatafileImage/load/%i/' target='_blank'><img src='/test/DatafileImage/load/%i/' /></a>" % (df_parameter.id, df_parameter.id), df_parameter.get()) self.assertEqual( "<a href='/test/DatasetImage/load/%i/' target='_blank'><img src='/test/DatasetImage/load/%i/' /></a>" % (ds_parameter.id, ds_parameter.id), ds_parameter.get()) self.assertEqual( "<a href='/test/ExperimentImage/load/%i/' target='_blank'><img src='/test/ExperimentImage/load/%i/' /></a>" % (exp_parameter.id, exp_parameter.id), exp_parameter.get()) remove(path.join(settings.FILE_STORE_PATH, df_parameter.string_value)) remove(path.join(settings.FILE_STORE_PATH, ds_parameter.string_value)) remove(path.join(settings.FILE_STORE_PATH, exp_parameter.string_value))
def endElementNS(self, name, qname): # just get the element name without the namespace elName = name[1] if elName == 'dmdSec': self.inDmdSec = False # if we currently processing an experiment structure, let's # save the institution value before we finalise the experiment if self.processExperimentStruct: self.metsObject.institution = self.institution # let's save the experiment in the DB if self.tardisExpId: self.modelExperiment = models.Experiment.objects.get( pk=self.tardisExpId) else: self.modelExperiment = models.Experiment() self.modelExperiment.id = self.tardisExpId self.modelExperiment.url = self.metsObject.url self.modelExperiment.approved = True self.modelExperiment.title = self.metsObject.title self.modelExperiment.institution_name = \ self.metsObject.institution self.modelExperiment.description = self.metsObject.description self.modelExperiment.start_time = self.metsObject.start_time self.modelExperiment.end_time = self.metsObject.end_time self.modelExperiment.created_by = self.createdBy self.modelExperiment.save() self.holder.experimentDatabaseId = self.modelExperiment.id x = 0 for author in self.metsObject.authors: author_experiment = models.Author_Experiment( experiment=self.modelExperiment, author=author, order=x) author_experiment.save() x = x + 1 elif self.processDatasetStruct: # let's save the dataset in the DB self.modelDataset = models.Dataset( description=self.metsObject.title, immutable=settings.IMMUTABLE_METS_DATASETS) self.modelDataset.save() self.modelDataset.experiments.add(self.modelExperiment) self.modelDataset.save() # let's also save the modelDataset in a dictionary so that we # can look it up easily later on when we start processing # the datafiles. self.datasetLookupDict[self.metsObject.id] = self.modelDataset self.metsObject = None self.processExperimentStruct = False self.processDatasetStruct = False elif elName == 'title' and self.inDmdSec: self.grabTitle = False elif elName == 'startTime' and self.processExperimentStruct: self.grabStartTime = False elif elName == 'endTime' and self.processExperimentStruct: self.grabEndTime = False elif elName == 'url' and self.processExperimentStruct: self.grabExperimentUrl = False elif elName == 'abstract' and self.processExperimentStruct: self.grabAbstract = False elif elName == 'name' and self.processExperimentStruct: self.inName = False elif elName == 'namePart' and self.inName: self.grabMightBeAuthor = False elif elName == 'roleTerm' and self.inName: self.grabRoleTerm = False self.mightBeAuthor = None elif elName == 'name' and self.inInstitution: self.grabInstitution = False elif elName == 'agent': self.inInstitution = False elif elName == 'amdSec': # we're done processing the metadata entries self.inAmdSec = False # let's reset the cached experiment model object self.modelExperiment = None elif elName == 'techMD' and self.inAmdSec: self.inTechMd = False self.metadataId = None self.metsObject = None self.processMetadata = False elif elName == 'xmlData' and self.inTechMd: self.inXmlData = False elif elName != self.xmlDataChildElement and \ self.customHandler is not None: self.customHandler.endElement(elName) elif elName == self.xmlDataChildElement and self.inXmlData: if self.customHandler is not None: self.tempMetadataHolder = self.customHandler.metadataDict try: schema = models.Schema.objects.get( namespace__exact=self.elementNamespace) # get the associated parameter names for the given schema parameterNames = \ models.ParameterName.objects.filter( schema__namespace__exact=schema.namespace).order_by('id') # let's create a trigger holder which we can use to check # if we still need to create another parameterset entry in the # DB createParamSetFlag = {'experiment': True, 'dataset': True, 'datafile': True} datasetParameterSet = None datafileParameterSet = None if self.metadataId in self.holder.metadataMap: for metsObject in self.holder.metadataMap[self.metadataId]: self.metsObject = metsObject metsObjectClassName = self.metsObject.__class__.__name__ if metsObjectClassName == 'Experiment': if createParamSetFlag['experiment']: # create a new parameter set for the metadata parameterSet = \ models.ExperimentParameterSet( schema=schema, experiment=self.modelExperiment) parameterSet.save() # now let's process the experiment parameters for parameterName in parameterNames: if parameterName.name in \ self.tempMetadataHolder: parameterValues = self.tempMetadataHolder[ parameterName.name] self._saveParameters('ExperimentParameter', parameterName, parameterValues, parameterSet) createParamSetFlag['experiment'] = False else: # this is not even allowed as there's only going # to be one experiment per METS file raise Exception('forbidden state!') elif metsObjectClassName == 'Dataset': if createParamSetFlag['dataset']: dataset = self.datasetLookupDict[ self.metsObject.id] # create a new parameter set for the # dataset metadata datasetParameterSet = \ models.DatasetParameterSet(schema=schema, dataset=dataset) datasetParameterSet.save() # now let's process the dataset parameters for parameterName in parameterNames: if parameterName.name in \ self.tempMetadataHolder: parameterValues = self.tempMetadataHolder[ parameterName.name] self._saveParameters('DatasetParameter', parameterName, parameterValues, datasetParameterSet) # disable creation for the next visit createParamSetFlag['dataset'] = False elif metsObjectClassName == 'Datafile': # this will be a good time to save the # "hard" metadata of this datafile so that # when we start adding "soft" metadata # parameters to it, we already have an # entry for it in the DB # look up the dataset this file belongs to thisFilesDataset = self.datasetLookupDict[ self.metsObject.dataset.id] # also check if the file already exists datafile = thisFilesDataset.dataset_file_set.filter( filename=self.metsObject.name, size=self.metsObject.size) if datafile.count() == 0: size = self.metsObject.size if not self.metsObject.size: size = 0 def checksum(obj, type_): # Check if the checksum is of type if obj.checksumType != type_: return '' checksum = obj.checksum.lower() # Ensure the checksum is hexdecimal if not re.match('[0-9a-f]+$', checksum): return '' # Get algorithm try: name = type_.replace('-','').lower() alg = getattr(hashlib, name) except: return '' # Check checksum is the correct length hex_length = alg('').digest_size * 2 if hex_length != len(checksum): return '' # Should be valid checksum of given type return checksum sync_url, proto = get_sync_url_and_protocol( self.syncRootDir, self.metsObject.url) self.modelDatafile = models.Dataset_File( dataset=thisFilesDataset, filename=self.metsObject.name, url=sync_url, size=size, md5sum=checksum(self.metsObject, 'MD5'), sha512sum=checksum(self.metsObject, 'SHA-512'), protocol=proto) self.modelDatafile.save() else: self.modelDatafile = thisFilesDataset.dataset_file_set.get( filename=self.metsObject.name, size=self.metsObject.size) # TODO: we need to note here that we are # only creating a datafile entry in the DB # for files that have corresponding # metadata. if we are to create a file # entry for files with no metadata, we'll # need to get the unaccessed datafiles # from datasetLookupDict. if createParamSetFlag['datafile']: # create a new parameter set for the metadata datafileParameterSet = \ models.DatafileParameterSet(schema=schema, dataset_file=self.modelDatafile) datafileParameterSet.save() # now let's process the datafile parameters for parameterName in parameterNames: if parameterName.name in \ self.tempMetadataHolder: parameterValues = self.tempMetadataHolder[ parameterName.name] self._saveParameters('DatafileParameter', parameterName, parameterValues, datafileParameterSet) createParamSetFlag['datafile'] = False except models.Schema.DoesNotExist: logger.warning('unsupported schema being ingested ' + self.elementNamespace) # reset the current xmlData child element so that if a new # parameter set is read, we can process it again self.xmlDataChildElement = None self.customHandler = None elif elName == self.parameterName and \ self.xmlDataChildElement is not None: # reset self.parameterName to None so the next parameter can be # processed self.parameterName = None
def setUp(self): from django.contrib.auth.models import User from tempfile import mkdtemp user = '******' pwd = 'secret' email = '' self.user = User.objects.create_user(user, email, pwd) self.test_dir = mkdtemp() self.exp = models.Experiment( title='test exp1', institution_name='monash', created_by=self.user, ) self.exp.save() self.dataset = models.Dataset(description="dataset description...", experiment=self.exp) self.dataset.save() self.datafile = models.Dataset_File(dataset=self.dataset,\ filename="testfile.txt", url="file://1/testfile.txt") self.datafile.save() self.schema = models.Schema(namespace="http://localhost/psmtest/df/", name="Parameter Set Manager", type=3) self.schema.save() self.parametername1 = models.ParameterName(schema=self.schema, name="parameter1", full_name="Parameter 1") self.parametername1.save() self.parametername2 = models.ParameterName( schema=self.schema, name="parameter2", full_name="Parameter 2", data_type=models.ParameterName.NUMERIC) self.parametername2.save() self.datafileparameterset = models.DatafileParameterSet( schema=self.schema, dataset_file=self.datafile) self.datafileparameterset.save() self.datafileparameter1 = models.DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername1, string_value="test1") self.datafileparameter1.save() self.datafileparameter2 = models.DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername2, numerical_value=2) self.datafileparameter2.save()
def test_parameter(self): from tardis.tardis_portal import models exp = models.Experiment(title='test exp1', institution_name='Australian Synchrotron', approved=True, created_by=self.user, public_access=models.Experiment.PUBLIC_ACCESS_NONE, ) exp.save() dataset = models.Dataset(description="dataset description") dataset.save() dataset.experiments.add(exp) dataset.save() df_file = models.Dataset_File(dataset=dataset, filename='file.txt', size='42', md5sum='bogus') df_file.save() df_schema = models.Schema(namespace='http://www.cern.ch/felzmann/schema1.xml', type=models.Schema.DATAFILE) df_schema.save() ds_schema = models.Schema(namespace='http://www.cern.ch/felzmann/schema2.xml', type=models.Schema.DATASET) ds_schema.save() exp_schema = models.Schema(namespace='http://www.cern.ch/felzmann/schema3.xml', type=models.Schema.EXPERIMENT) exp_schema.save() df_parname = models.ParameterName(schema=df_schema, name='name', full_name='full_name', units='image/jpg', data_type=models.ParameterName.FILENAME) df_parname.save() ds_parname = models.ParameterName(schema=ds_schema, name='name', full_name='full_name', units='image/jpg', data_type=models.ParameterName.FILENAME) ds_parname.save() exp_parname = models.ParameterName(schema=exp_schema, name='name', full_name='full_name', units='image/jpg', data_type=models.ParameterName.FILENAME) exp_parname.save() df_parset = models.DatafileParameterSet(schema=df_schema, dataset_file=df_file) df_parset.save() ds_parset = models.DatasetParameterSet(schema=ds_schema, dataset=dataset) ds_parset.save() exp_parset = models.ExperimentParameterSet(schema=exp_schema, experiment=exp) exp_parset.save() from os import path with self.settings(METADATA_STORE_PATH=path.dirname(__file__)): filename = 'test.jpg' df_parameter = models.DatafileParameter(name=df_parname, parameterset=df_parset, string_value=filename) df_parameter.save() ds_parameter = models.DatasetParameter(name=ds_parname, parameterset=ds_parset, string_value=filename) ds_parameter.save() exp_parameter = models.ExperimentParameter(name=exp_parname, parameterset=exp_parset, string_value=filename) exp_parameter.save() self.assertEqual( "<a href='/test/DatafileImage/load/%i/' target='_blank'><img style='width: 300px;' src='/test/DatafileImage/load/%i/' /></a>" % # noqa (df_parameter.id, df_parameter.id), df_parameter.get()) self.assertEqual( "<a href='/test/DatasetImage/load/%i/' target='_blank'><img style='width: 300px;' src='/test/DatasetImage/load/%i/' /></a>" % # noqa (ds_parameter.id, ds_parameter.id), ds_parameter.get()) self.assertEqual( "<a href='/test/ExperimentImage/load/%i/' target='_blank'><img style='width: 300px;' src='/test/ExperimentImage/load/%i/' /></a>" % # noqa (exp_parameter.id, exp_parameter.id), exp_parameter.get())