def saveMetadata(self, instance, schema, metadata): """Save all the metadata to a Dataset_Files paramamter set. """ parameters = self.getParameters(schema, metadata) exclude_line = dict() exclude_line['-----'] = None exclude_line['Reading global metadata'] = None exclude_line['Reading metadata'] = None exclude_line['Reading core metadata'] = None exclude_line['Populating metadata'] = None exclude_line['Reading tags'] = None exclude_line['Verifying Gatan format'] = None exclude_line['Initializing reader'] = None exclude_line['Checking file format [Gatan Digital Micrograph]'] = None if not parameters: return None try: ps = DatafileParameterSet.objects.get(schema=schema, dataset_file=instance) return ps # if already exists then just return it except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, dataset_file=instance) ps.save() for p in parameters: print p.name if p.name in metadata: dfp = DatafileParameter(parameterset=ps, name=p) if p.isNumeric(): if metadata[p.name] != '': dfp.numerical_value = metadata[p.name] dfp.save() else: print p.name if isinstance(metadata[p.name], list): for val in reversed(metadata[p.name]): strip_val = val.strip() if strip_val: if not strip_val in exclude_line: dfp = DatafileParameter(parameterset=ps, name=p) dfp.string_value = strip_val dfp.save() else: dfp.string_value = metadata[p.name] dfp.save() return ps
def saveExifMetadata(self, instance, schema, metadata): """Save all the metadata to a Dataset_Files paramamter set. """ parameters = self.getParamaters(schema, metadata) if not parameters: return None try: ps = DatafileParameterSet.objects.get(schema=schema, dataset_file=instance) return ps # if already exists then just return it except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, dataset_file=instance) ps.save() for p in parameters: if p.name in metadata: dfp = DatafileParameter(parameterset=ps, name=p) if p.isNumeric(): dfp.numerical_value = metadata[p.name] else: dfp.string_value = metadata[p.name] dfp.save() return ps
def test_000_update_df_status_offline(self, mock_stat): """update_df_status should check the online status of preferred DFOs for all previously online datafiles and update online Parameter to 'False' for any offline files.""" df1 = DataFile(dataset=self.dataset, filename="test_df.jpg") df1.save() dfo1 = DataFileObject(datafile=df1, storage_box=self.sbox1, uri="stream/test.jpg", verified=True) dfo1.save() schema = Schema.objects.get(namespace=HSM_DATAFILE_NAMESPACE) ps = DatafileParameterSet(schema=schema, datafile=df1) ps.save() param_name = ParameterName.objects.get(schema=schema, name="online") param = DatafileParameter(parameterset=ps, name=param_name) param.string_value = True param.save() mock_stat.return_value = Stats(st_size=10000, st_blocks=0, st_mtime=datetime.now()) update_df_status() params = DatafileParameter.objects.filter( parameterset__schema=schema, parameterset__datafile=df1) self.assertEquals(params.count(), 1) self.assertEquals(params[0].string_value, "False")
def test_003_update_df_status_skip_offline(self, mock_stat, mock_df_online): """update_df_status should skip any files that have previously marked as offline.""" df2 = DataFile(dataset=self.dataset, filename="test_df2.jpg") df2.save() dfo2 = DataFileObject(datafile=df2, storage_box=self.sbox1, uri="stream/test_df2.jpg", verified=True) dfo2.save() # df2.verify() schema = Schema.objects.get(namespace=HSM_DATAFILE_NAMESPACE) ps2 = DatafileParameterSet(schema=schema, datafile=df2) ps2.save() param_name = ParameterName.objects.get(schema=schema, name="online") param2 = DatafileParameter(parameterset=ps2, name=param_name) param2.string_value = False param2.save() mock_stat.return_value = Stats(st_size=10000, st_blocks=100, st_mtime=datetime.now()) update_df_status() # assert that the df_online method wasn't called self.assertEquals(mock_df_online.call_count, 0)
def saveFlexstationMetadata(self, instance, schema, metadata): """Saves or overwrites the datafile's metadata to a Dataset_Files parameter set in the database. """ logger.info('Saving Metadata') parameters = self.getParameters(schema, metadata) if not parameters: return None try: ps = DatafileParameterSet.objects.get(schema=schema, dataset_file=instance) return ps # if already exists then just return it except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, dataset_file=instance) ps.save() for p in parameters: if p.name in metadata: dfp = DatafileParameter(parameterset=ps, name=p) if p.isNumeric(): if metadata[p.name] != '': dfp.numerical_value = metadata[p.name] dfp.save() else: dfp.string_value = metadata[p.name].decode('cp1252') dfp.save() return ps
def saveMetadata(self, instance, schema, metadata): """Save all the metadata to a Dataset_Files parameter set. """ logger.error('Olympus-saveMetadata()') parameters = self.getParameters(schema, metadata) if not parameters: return None try: ps = DatafileParameterSet.objects.get(schema=schema, dataset_file=instance) return ps # if already exists then just return it except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, dataset_file=instance) ps.save() for p in parameters: print p.name if p.name in metadata: dfp = DatafileParameter(parameterset=ps, name=p) if p.isNumeric(): if metadata[p.name] != '': dfp.numerical_value = metadata[p.name] dfp.save() else: print p.name if isinstance(metadata[p.name], list): for val in reversed(metadata[p.name]): strip_val = val.strip() if strip_val: if not strip_val in exclude_line: dfp = DatafileParameter(parameterset=ps, name=p) dfp.string_value = strip_val dfp.save() else: dfp.string_value = metadata[p.name] dfp.save() return ps
def savep(paramk, paramv): param_name = ParameterName.objects.get(schema__id=schema.id, name=paramk) dfp = DatafileParameter(parameterset=param_set, name=param_name) if paramv != "": if param_name.isNumeric(): dfp.numerical_value = paramv else: dfp.string_value = paramv dfp.save()
def test_link_parameter_type_extra(self): # make a second ParameterSet for testing some variations # in URL values self.datafileparameterset2 = DatafileParameterSet( schema=self.schema, datafile=self.datafile) self.datafileparameterset2.save() psm = ParameterSetManager(parameterset=self.datafileparameterset2) self.dataset_link_param2 = DatafileParameter( parameterset=self.datafileparameterset2, name=self.parametername_dataset_link) # /dataset/1 - no trailing slash dataset_url = self.dataset.get_absolute_url() self.dataset_link_param2.set_value(dataset_url) self.dataset_link_param2.save() # Check link_id/link_ct/link_gfk to dataset self.assertTrue( psm.get_param("dataset_link").link_id == self.dataset.id) dataset_ct = ContentType.objects.get(model__iexact="dataset") self.assertTrue(psm.get_param("dataset_link").link_ct == dataset_ct) self.assertTrue(psm.get_param("dataset_link").link_gfk == self.dataset) # Test links of the form /api/v1/experiment/<experiment_id>/ self.exp_link_param2 = DatafileParameter( parameterset=self.datafileparameterset2, name=self.parametername_exp_link) exp_url = '/api/v1/experiment/%s/' % self.exp.id self.exp_link_param2.set_value(exp_url) self.exp_link_param2.save() # Check link_id/link_ct/link_gfk to experiment self.assertTrue(psm.get_param("exp_link").link_id == self.exp.id) exp_ct = ContentType.objects.get(model__iexact="experiment") self.assertTrue(psm.get_param("exp_link").link_ct == exp_ct) self.assertTrue(psm.get_param("exp_link").link_gfk == self.exp)
def create_df_status(datafile, schema_name, min_file_size): """Post-save celery task that checks online status of new file and create HSM metadata to track online status. Parameters ---------- datafile: DataFile datafile to check and create online/offline status metadata for schema_name: Schema name of Schema which describes ParameterNames min_file_size : int minimum size of files to check HSM status of. This param is simply passed on to df_online. Returns ------- None """ if datafile.verified: with DatafileLock(datafile, "datafile-%s" % datafile.id) as lock: if lock: schema = Schema.objects.get(namespace=schema_name) if DatafileParameterSet.objects.filter( schema=schema, datafile=datafile).exists(): LOGGER.debug( """HSM DatafileParameterSet already exists for: %s""", datafile.id ) return ps = DatafileParameterSet(schema=schema, datafile=datafile) ps.save() param_name = ParameterName.objects.get( schema=schema, name="online" ) dfp = DatafileParameter(parameterset=ps, name=param_name) dfp.string_value = str(df_online(datafile, min_file_size)) dfp.save() else: LOGGER.warning( """Cannot determine online/offline status for datafile %s " "is not verified""", datafile.id )
def test_unresolvable_link_parameter(self): """ Test that LINK Parameters that can't be resolved to a model (including non-URL values) still work. """ self.datafileparameterset3 = DatafileParameterSet( schema=self.schema, datafile=self.datafile) self.datafileparameterset3.save() psm = ParameterSetManager(parameterset=self.datafileparameterset3) # Create a Parameter of type LINK to an unresolvable (non-URL) # free-text value self.freetext_link_param = DatafileParameter( parameterset=self.datafileparameterset3, name=self.parametername_unresolvable_link) self.assertRaises( SuspiciousOperation, lambda: self.freetext_link_param.set_value("FREETEXT_ID_123"))
def get_parse_status(squash_datafile, ns): from tardis.tardis_portal.models import DatafileParameter try: status = squash_datafile.datafileparameterset_set.get( schema__namespace=ns ).datafileparameter_set.get( name__name='parse_status') except DatafileParameter.DoesNotExist: from tardis.tardis_portal.models import (Schema, ParameterName) schema = Schema.objects.get(type=Schema.DATAFILE, namespace=ns) pn, created = ParameterName.objects.get_or_create( name='parse_status', schema=schema, data_type=ParameterName.STRING) ps = squash_datafile.datafileparameterset_set.get(schema=schema) status = DatafileParameter(parameterset=ps, name=pn, string_value='new') status.save() return status
def test_002_update_df_status_skip_unverified(self, mock_stat, df_online): """update_df_status should skip files that are unverified""" df2 = DataFile(dataset=self.dataset, filename="test_df2.jpg") df2.save() dfo2 = DataFileObject(datafile=df2, storage_box=self.sbox1, uri="stream/test_df2.jpg") dfo2.save() schema = Schema.objects.get(namespace=HSM_DATAFILE_NAMESPACE) ps2 = DatafileParameterSet(schema=schema, datafile=df2) ps2.save() param_name = ParameterName.objects.get(schema=schema, name="online") param2 = DatafileParameter(parameterset=ps2, name=param_name) param2.string_value = True param2.save() mock_stat.return_value = Stats(st_size=10000, st_blocks=100, st_mtime=datetime.now()) update_df_status() df_online.assert_not_called()
def test_parameter(self): exp = Experiment( title='test exp1', institution_name='Australian Synchrotron', approved=True, created_by=self.user, public_access=Experiment.PUBLIC_ACCESS_NONE, ) exp.save() dataset = Dataset(description="dataset description") dataset.save() dataset.experiments.add(exp) dataset.save() df_file = DataFile(dataset=dataset, filename='file.txt', size=42, md5sum='bogus') df_file.save() df_schema = Schema(namespace='http://www.cern.ch/felzmann/schema1.xml', type=Schema.DATAFILE) df_schema.save() ds_schema = Schema(namespace='http://www.cern.ch/felzmann/schema2.xml', type=Schema.DATASET) ds_schema.save() exp_schema = Schema( namespace='http://www.cern.ch/felzmann/schema3.xml', type=Schema.EXPERIMENT) exp_schema.save() df_parname = ParameterName(schema=df_schema, name='name', full_name='full_name', units='image/jpg', data_type=ParameterName.FILENAME) df_parname.save() ds_parname = ParameterName(schema=ds_schema, name='name', full_name='full_name', units='image/jpg', data_type=ParameterName.FILENAME) ds_parname.save() exp_parname = ParameterName(schema=exp_schema, name='name', full_name='full_name', units='image/jpg', data_type=ParameterName.FILENAME) exp_parname.save() df_parset = DatafileParameterSet(schema=df_schema, datafile=df_file) df_parset.save() ds_parset = DatasetParameterSet(schema=ds_schema, dataset=dataset) ds_parset.save() exp_parset = ExperimentParameterSet(schema=exp_schema, experiment=exp) exp_parset.save() with self.settings(METADATA_STORE_PATH=os.path.dirname(__file__)): filename = 'test.jpg' df_parameter = DatafileParameter(name=df_parname, parameterset=df_parset, string_value=filename) df_parameter.save() ds_parameter = DatasetParameter(name=ds_parname, parameterset=ds_parset, string_value=filename) ds_parameter.save() exp_parameter = ExperimentParameter(name=exp_parname, parameterset=exp_parset, string_value=filename) exp_parameter.save() self.assertEqual( "<a href='/display/DatafileImage/load/%i/' target='_blank'><img style='width: 300px;' src='/display/DatafileImage/load/%i/' /></a>" % # noqa (df_parameter.id, df_parameter.id), df_parameter.get()) self.assertEqual( "<a href='/display/DatasetImage/load/%i/' target='_blank'><img style='width: 300px;' src='/display/DatasetImage/load/%i/' /></a>" % # noqa (ds_parameter.id, ds_parameter.id), ds_parameter.get()) self.assertEqual( "<a href='/display/ExperimentImage/load/%i/' target='_blank'><img style='width: 300px;' src='/display/ExperimentImage/load/%i/' /></a>" % # noqa (exp_parameter.id, exp_parameter.id), exp_parameter.get())
def setUp(self): from django.contrib.auth.models import User from tempfile import mkdtemp user = '******' pwd = 'secret' email = '' self.user = User.objects.create_user(user, email, pwd) self.test_dir = mkdtemp() self.exp = Experiment(title='test exp1', institution_name='monash', created_by=self.user) self.exp.save() self.dataset = Dataset(description="dataset description...") self.dataset.save() self.dataset.experiments.add(self.exp) self.dataset.save() self.datafile = DataFile(dataset=self.dataset, filename="testfile.txt", size="42", md5sum='bogus') self.datafile.save() self.dfo = DataFileObject( datafile=self.datafile, storage_box=self.datafile.get_default_storage_box(), uri="1/testfile.txt") self.dfo.save() self.schema = Schema(namespace="http://localhost/psmtest/df/", name="Parameter Set Manager", type=3) self.schema.save() self.parametername1 = ParameterName(schema=self.schema, name="parameter1", full_name="Parameter 1") self.parametername1.save() self.parametername2 = ParameterName(schema=self.schema, name="parameter2", full_name="Parameter 2", data_type=ParameterName.NUMERIC) self.parametername2.save() self.parametername3 = ParameterName(schema=self.schema, name="parameter3", full_name="Parameter 3", data_type=ParameterName.DATETIME) self.parametername3.save() self.datafileparameterset = DatafileParameterSet( schema=self.schema, datafile=self.datafile) self.datafileparameterset.save() self.datafileparameter1 = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername1, string_value="test1") self.datafileparameter1.save() self.datafileparameter2 = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername2, numerical_value=2) self.datafileparameter2.save() # Create a ParameterName and Parameter of type LINK to an experiment self.parametername_exp_link = ParameterName( schema=self.schema, name="exp_link", full_name="This parameter is a experiment LINK", data_type=ParameterName.LINK) self.parametername_exp_link.save() self.exp_link_param = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername_exp_link) exp_url = self.exp.get_absolute_url() # /experiment/view/1/ self.exp_link_param.set_value(exp_url) self.exp_link_param.save() # Create a ParameterName and Parameter of type LINK to a dataset self.parametername_dataset_link = ParameterName( schema=self.schema, name="dataset_link", full_name="This parameter is a dataset LINK", data_type=ParameterName.LINK) self.parametername_dataset_link.save() self.dataset_link_param = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername_dataset_link) dataset_url = self.dataset.get_absolute_url() # /dataset/1/ self.dataset_link_param.set_value(dataset_url) self.dataset_link_param.save() # Create a ParameterName type LINK to an unresolvable (non-URL) # free-text value self.parametername_unresolvable_link = ParameterName( schema=self.schema, name="freetext_link", full_name="This parameter is a non-URL LINK", data_type=ParameterName.LINK) self.parametername_unresolvable_link.save()