def saveSpectraMetadata(self, instance, schema, metadata): """Save all the metadata to a Dataset_Files paramamter set. """ parameters = self.getParamaters(schema, metadata) if not parameters: return None (ps, created) = DatafileParameterSet.objects.get_or_create(schema=schema, dataset_file=instance) if created: # new object was created ps.save() else: # if parameter set already exists then just return it return ps # save datafile parameters for p in parameters: if p.name in metadata: dfp = DatafileParameter(parameterset=ps, name=p) if p.isNumeric(): dfp.numerical_value = metadata[p.name][0] else: dfp.string_value = metadata[p.name][0] dfp.save() return ps
def saveExifMetadata(self, instance, schema, metadata): """Save all the metadata to a Dataset_Files paramamter set. """ parameters = self.getParamaters(schema, metadata) if not parameters: return None try: ps = DatafileParameterSet.objects.get(schema=schema, dataset_file=instance) return ps # if already exists then just return it except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, dataset_file=instance) ps.save() for p in parameters: if p.name in metadata: dfp = DatafileParameter(parameterset=ps, name=p) if p.isNumeric(): dfp.numerical_value = metadata[p.name] else: dfp.string_value = metadata[p.name] dfp.save() return ps
def saveFlexstationMetadata(self, instance, schema, metadata): """Saves or overwrites the datafile's metadata to a Dataset_Files parameter set in the database. """ logger.info('Saving Metadata') parameters = self.getParameters(schema, metadata) if not parameters: return None try: ps = DatafileParameterSet.objects.get(schema=schema, dataset_file=instance) return ps # if already exists then just return it except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, dataset_file=instance) ps.save() for p in parameters: if p.name in metadata: dfp = DatafileParameter(parameterset=ps, name=p) if p.isNumeric(): if metadata[p.name] != '': dfp.numerical_value = metadata[p.name] dfp.save() else: dfp.string_value = metadata[p.name].decode('cp1252') dfp.save() return ps
def test_003_update_df_status_skip_offline(self, mock_stat, mock_df_online): """update_df_status should skip any files that have previously marked as offline.""" df2 = DataFile(dataset=self.dataset, filename="test_df2.jpg") df2.save() dfo2 = DataFileObject(datafile=df2, storage_box=self.sbox1, uri="stream/test_df2.jpg", verified=True) dfo2.save() # df2.verify() schema = Schema.objects.get(namespace=HSM_DATAFILE_NAMESPACE) ps2 = DatafileParameterSet(schema=schema, datafile=df2) ps2.save() param_name = ParameterName.objects.get(schema=schema, name="online") param2 = DatafileParameter(parameterset=ps2, name=param_name) param2.string_value = False param2.save() mock_stat.return_value = Stats(st_size=10000, st_blocks=100, st_mtime=datetime.now()) update_df_status() # assert that the df_online method wasn't called self.assertEquals(mock_df_online.call_count, 0)
def saveDicomMetadata(self, instance, schema, metadata): """Save all the metadata to a Dataset_Files paramamter set. """ # FIXME reenable this... #parameters = self.getParameters(schema, metadata) #if not parameters: # logger.debug('dicompng saveDicomMetadata: parameters == NULL :-(') # return None logger = logging.getLogger(__name__) logger.setLevel(10) logger.debug('dicompng saveDicomMetadata...') try: ps = DatafileParameterSet.objects.get(schema=schema, dataset_file=instance) return ps # if already exists then just return it except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, dataset_file=instance) ps.save() logger.debug('dicompng UP TO HERE, WHAT NEXT?') try: logger.debug('dicompng UP TO HERE2') dfp = DatafileParameter(parameterset=ps, name=ParameterName.objects.get(name='dump')) logger.debug('dicompng UP TO HERE3') dfp.string_value = metadata logger.debug('dicompng UP TO HERE4: ' + metadata) dfp.save() logger.debug('dicompng UP TO HERE5') except Exception, e: logger.debug('ZZZ' + str(e)) return None
def test_000_update_df_status_offline(self, mock_stat): """update_df_status should check the online status of preferred DFOs for all previously online datafiles and update online Parameter to 'False' for any offline files.""" df1 = DataFile(dataset=self.dataset, filename="test_df.jpg") df1.save() dfo1 = DataFileObject(datafile=df1, storage_box=self.sbox1, uri="stream/test.jpg", verified=True) dfo1.save() schema = Schema.objects.get(namespace=HSM_DATAFILE_NAMESPACE) ps = DatafileParameterSet(schema=schema, datafile=df1) ps.save() param_name = ParameterName.objects.get(schema=schema, name="online") param = DatafileParameter(parameterset=ps, name=param_name) param.string_value = True param.save() mock_stat.return_value = Stats(st_size=10000, st_blocks=0, st_mtime=datetime.now()) update_df_status() params = DatafileParameter.objects.filter( parameterset__schema=schema, parameterset__datafile=df1) self.assertEquals(params.count(), 1) self.assertEquals(params[0].string_value, "False")
def savep(paramk, paramv): param_name = ParameterName.objects.get(schema__id=schema.id, name=paramk) dfp = DatafileParameter(parameterset=param_set, name=param_name) if paramv != "": if param_name.isNumeric(): dfp.numerical_value = paramv else: dfp.string_value = paramv dfp.save()
def create_df_status(datafile, schema_name, min_file_size): """Post-save celery task that checks online status of new file and create HSM metadata to track online status. Parameters ---------- datafile: DataFile datafile to check and create online/offline status metadata for schema_name: Schema name of Schema which describes ParameterNames min_file_size : int minimum size of files to check HSM status of. This param is simply passed on to df_online. Returns ------- None """ if datafile.verified: with DatafileLock(datafile, "datafile-%s" % datafile.id) as lock: if lock: schema = Schema.objects.get(namespace=schema_name) if DatafileParameterSet.objects.filter( schema=schema, datafile=datafile).exists(): LOGGER.debug( """HSM DatafileParameterSet already exists for: %s""", datafile.id ) return ps = DatafileParameterSet(schema=schema, datafile=datafile) ps.save() param_name = ParameterName.objects.get( schema=schema, name="online" ) dfp = DatafileParameter(parameterset=ps, name=param_name) dfp.string_value = str(df_online(datafile, min_file_size)) dfp.save() else: LOGGER.warning( """Cannot determine online/offline status for datafile %s " "is not verified""", datafile.id )
def get_parse_status(squash_datafile, ns): from tardis.tardis_portal.models import DatafileParameter try: status = squash_datafile.datafileparameterset_set.get( schema__namespace=ns ).datafileparameter_set.get( name__name='parse_status') except DatafileParameter.DoesNotExist: from tardis.tardis_portal.models import (Schema, ParameterName) schema = Schema.objects.get(type=Schema.DATAFILE, namespace=ns) pn, created = ParameterName.objects.get_or_create( name='parse_status', schema=schema, data_type=ParameterName.STRING) ps = squash_datafile.datafileparameterset_set.get(schema=schema) status = DatafileParameter(parameterset=ps, name=pn, string_value='new') status.save() return status
def test_002_update_df_status_skip_unverified(self, mock_stat, df_online): """update_df_status should skip files that are unverified""" df2 = DataFile(dataset=self.dataset, filename="test_df2.jpg") df2.save() dfo2 = DataFileObject(datafile=df2, storage_box=self.sbox1, uri="stream/test_df2.jpg") dfo2.save() schema = Schema.objects.get(namespace=HSM_DATAFILE_NAMESPACE) ps2 = DatafileParameterSet(schema=schema, datafile=df2) ps2.save() param_name = ParameterName.objects.get(schema=schema, name="online") param2 = DatafileParameter(parameterset=ps2, name=param_name) param2.string_value = True param2.save() mock_stat.return_value = Stats(st_size=10000, st_blocks=100, st_mtime=datetime.now()) update_df_status() df_online.assert_not_called()
def saveMetadata(self, instance, schema, metadata): """Save all the metadata to a Dataset_Files paramamter set. """ parameters = self.getParameters(schema, metadata) exclude_line = dict() exclude_line['-----'] = None exclude_line['Reading global metadata'] = None exclude_line['Reading metadata'] = None exclude_line['Reading core metadata'] = None exclude_line['Populating metadata'] = None exclude_line['Reading tags'] = None exclude_line['Verifying Gatan format'] = None exclude_line['Initializing reader'] = None exclude_line['Checking file format [Gatan Digital Micrograph]'] = None if not parameters: return None try: ps = DatafileParameterSet.objects.get(schema=schema, dataset_file=instance) return ps # if already exists then just return it except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, dataset_file=instance) ps.save() for p in parameters: print p.name if p.name in metadata: dfp = DatafileParameter(parameterset=ps, name=p) if p.isNumeric(): if metadata[p.name] != '': dfp.numerical_value = metadata[p.name] dfp.save() else: print p.name if isinstance(metadata[p.name], list): for val in reversed(metadata[p.name]): strip_val = val.strip() if strip_val: if not strip_val in exclude_line: dfp = DatafileParameter(parameterset=ps, name=p) dfp.string_value = strip_val dfp.save() else: dfp.string_value = metadata[p.name] dfp.save() return ps
def saveMetadata(self, instance, schema, metadata): """Save all the metadata to a Dataset_Files paramamter set. """ parameters = self.getParameters(schema, metadata) # Some/all? of these excludes below are specific to DM3 format: exclude_line = dict() if not parameters: print "Bailing out of saveMetadata because of 'not parameters'." return None try: ps = DatafileParameterSet.objects.get(schema=schema, datafile=instance) print "Parameter set already exists for %s, so we'll just " \ "return it." % instance.filename return ps # if already exists then just return it except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, datafile=instance) ps.save() for p in parameters: print p.name if p.name in metadata: dfp = DatafileParameter(parameterset=ps, name=p) if p.isNumeric(): if metadata[p.name] != '': dfp.numerical_value = metadata[p.name] dfp.save() else: print p.name if isinstance(metadata[p.name], list): for val in reversed(metadata[p.name]): strip_val = val.strip() if strip_val: if strip_val not in exclude_line: dfp = DatafileParameter(parameterset=ps, name=p) dfp.string_value = strip_val dfp.save() else: dfp.string_value = metadata[p.name] dfp.save() return ps
def saveMetadata(self, instance, schema, metadata): """Save all the metadata to a Dataset_Files parameter set. """ logger.error('Olympus-saveMetadata()') parameters = self.getParameters(schema, metadata) if not parameters: return None try: ps = DatafileParameterSet.objects.get(schema=schema, dataset_file=instance) return ps # if already exists then just return it except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, dataset_file=instance) ps.save() for p in parameters: print p.name if p.name in metadata: dfp = DatafileParameter(parameterset=ps, name=p) if p.isNumeric(): if metadata[p.name] != '': dfp.numerical_value = metadata[p.name] dfp.save() else: print p.name if isinstance(metadata[p.name], list): for val in reversed(metadata[p.name]): strip_val = val.strip() if strip_val: if not strip_val in exclude_line: dfp = DatafileParameter(parameterset=ps, name=p) dfp.string_value = strip_val dfp.save() else: dfp.string_value = metadata[p.name] dfp.save() return ps
def run_bfconvert(bfconvert_path, inputfilename, df_id, schema_id): """ Run Bioformats bfconvert on an image file. """ cache = caches['celery-locks'] # Locking functions to ensure only one instance of # bfconvert operates on each datafile at a time. lock_id = 'bioformats-filter-bfconvert-lock-%d' % df_id # cache.add fails if if the key already exists def acquire_lock(): return cache.add(lock_id, 'true', LOCK_EXPIRE) # cache.delete() can be slow, but we have to use it # to take advantage of using add() for atomic locking def release_lock(): cache.delete(lock_id) if acquire_lock(): try: schema = Schema.objects.get(id=schema_id) datafile = DataFile.objects.get(id=df_id) ps = DatafileParameterSet.objects.filter(schema=schema, datafile=datafile).first() if ps: prev_param = ParameterName.objects.get(schema__id=schema_id, name='previewImage') if DatafileParameter.objects.filter(parameterset=ps, name=prev_param).exists(): logger.info("Preview image already exists for df_id %d" % df_id) return outputextension = "png" dfo = DataFileObject.objects.filter(datafile__id=df_id, verified=True).first() preview_image_rel_file_path = os.path.join( os.path.dirname(urlparse.urlparse(dfo.uri).path), str(df_id), '%s.%s' % (os.path.basename(inputfilename), outputextension)) preview_image_file_path = os.path.join( settings.METADATA_STORE_PATH, preview_image_rel_file_path) if not os.path.exists( os.path.dirname(preview_image_file_path)): os.makedirs(os.path.dirname(preview_image_file_path)) # Extract only the first image from the stack: cmdline = "'%s' -series 0 -timepoint 0 -channel 0 -z 0 " \ "'%s' '%s' -overwrite" %\ (bfconvert_path, inputfilename, preview_image_file_path) logger.info(cmdline) p = subprocess.Popen(cmdline, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) stdout, _ = p.communicate() if p.returncode != 0: logger.error(stdout) return os.rename(preview_image_file_path, preview_image_file_path + '.bioformats') # Run ImageMagick convert with contrast-stretch on an image file. # We could probably do this with the Wand Python module instead. cmdline = "convert '%s.bioformats' -contrast-stretch 0 '%s'" %\ (preview_image_file_path, preview_image_file_path) logger.info(cmdline) p = subprocess.Popen(cmdline, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) stdout, _ = p.communicate() os.unlink(preview_image_file_path + '.bioformats') if p.returncode != 0: logger.error(stdout) return try: ps = DatafileParameterSet.objects.get(schema__id=schema_id, datafile__id=df_id) except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, datafile=datafile) ps.save() param_name = ParameterName.objects.get(schema__id=schema_id, name='previewImage') dfp = DatafileParameter(parameterset=ps, name=param_name) dfp.string_value = preview_image_rel_file_path dfp.save() finally: release_lock()
def test_parameter(self): exp = Experiment( title='test exp1', institution_name='Australian Synchrotron', approved=True, created_by=self.user, public_access=Experiment.PUBLIC_ACCESS_NONE, ) exp.save() dataset = Dataset(description="dataset description") dataset.save() dataset.experiments.add(exp) dataset.save() df_file = DataFile(dataset=dataset, filename='file.txt', size=42, md5sum='bogus') df_file.save() df_schema = Schema(namespace='http://www.cern.ch/felzmann/schema1.xml', type=Schema.DATAFILE) df_schema.save() ds_schema = Schema(namespace='http://www.cern.ch/felzmann/schema2.xml', type=Schema.DATASET) ds_schema.save() exp_schema = Schema( namespace='http://www.cern.ch/felzmann/schema3.xml', type=Schema.EXPERIMENT) exp_schema.save() df_parname = ParameterName(schema=df_schema, name='name', full_name='full_name', units='image/jpg', data_type=ParameterName.FILENAME) df_parname.save() ds_parname = ParameterName(schema=ds_schema, name='name', full_name='full_name', units='image/jpg', data_type=ParameterName.FILENAME) ds_parname.save() exp_parname = ParameterName(schema=exp_schema, name='name', full_name='full_name', units='image/jpg', data_type=ParameterName.FILENAME) exp_parname.save() df_parset = DatafileParameterSet(schema=df_schema, datafile=df_file) df_parset.save() ds_parset = DatasetParameterSet(schema=ds_schema, dataset=dataset) ds_parset.save() exp_parset = ExperimentParameterSet(schema=exp_schema, experiment=exp) exp_parset.save() with self.settings(METADATA_STORE_PATH=os.path.dirname(__file__)): filename = 'test.jpg' df_parameter = DatafileParameter(name=df_parname, parameterset=df_parset, string_value=filename) df_parameter.save() ds_parameter = DatasetParameter(name=ds_parname, parameterset=ds_parset, string_value=filename) ds_parameter.save() exp_parameter = ExperimentParameter(name=exp_parname, parameterset=exp_parset, string_value=filename) exp_parameter.save() self.assertEqual( "<a href='/display/DatafileImage/load/%i/' target='_blank'><img style='width: 300px;' src='/display/DatafileImage/load/%i/' /></a>" % # noqa (df_parameter.id, df_parameter.id), df_parameter.get()) self.assertEqual( "<a href='/display/DatasetImage/load/%i/' target='_blank'><img style='width: 300px;' src='/display/DatasetImage/load/%i/' /></a>" % # noqa (ds_parameter.id, ds_parameter.id), ds_parameter.get()) self.assertEqual( "<a href='/display/ExperimentImage/load/%i/' target='_blank'><img style='width: 300px;' src='/display/ExperimentImage/load/%i/' /></a>" % # noqa (exp_parameter.id, exp_parameter.id), exp_parameter.get())
class ParameterSetManagerTestCase(TestCase): def setUp(self): from django.contrib.auth.models import User from tempfile import mkdtemp user = '******' pwd = 'secret' email = '' self.user = User.objects.create_user(user, email, pwd) self.test_dir = mkdtemp() self.exp = Experiment(title='test exp1', institution_name='monash', created_by=self.user) self.exp.save() self.dataset = Dataset(description="dataset description...") self.dataset.save() self.dataset.experiments.add(self.exp) self.dataset.save() self.datafile = DataFile(dataset=self.dataset, filename="testfile.txt", size="42", md5sum='bogus') self.datafile.save() self.dfo = DataFileObject( datafile=self.datafile, storage_box=self.datafile.get_default_storage_box(), uri="1/testfile.txt") self.dfo.save() self.schema = Schema( namespace="http://localhost/psmtest/df/", name="Parameter Set Manager", type=3) self.schema.save() self.parametername1 = ParameterName( schema=self.schema, name="parameter1", full_name="Parameter 1") self.parametername1.save() self.parametername2 = ParameterName( schema=self.schema, name="parameter2", full_name="Parameter 2", data_type=ParameterName.NUMERIC) self.parametername2.save() self.parametername3 = ParameterName( schema=self.schema, name="parameter3", full_name="Parameter 3", data_type=ParameterName.DATETIME) self.parametername3.save() self.datafileparameterset = DatafileParameterSet( schema=self.schema, datafile=self.datafile) self.datafileparameterset.save() self.datafileparameter1 = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername1, string_value="test1") self.datafileparameter1.save() self.datafileparameter2 = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername2, numerical_value=2) self.datafileparameter2.save() # Create a ParameterName and Parameter of type LINK to an experiment self.parametername_exp_link = ParameterName( schema=self.schema, name="exp_link", full_name="This parameter is a experiment LINK", data_type=ParameterName.LINK) self.parametername_exp_link.save() self.exp_link_param = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername_exp_link) exp_url = self.exp.get_absolute_url() # /experiment/view/1/ self.exp_link_param.set_value(exp_url) self.exp_link_param.save() # Create a ParameterName and Parameter of type LINK to a dataset self.parametername_dataset_link = ParameterName( schema=self.schema, name="dataset_link", full_name="This parameter is a dataset LINK", data_type=ParameterName.LINK) self.parametername_dataset_link.save() self.dataset_link_param = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername_dataset_link) dataset_url = self.dataset.get_absolute_url() # /dataset/1/ self.dataset_link_param.set_value(dataset_url) self.dataset_link_param.save() # Create a ParameterName type LINK to an unresolvable (non-URL) # free-text value self.parametername_unresolvable_link = ParameterName( schema=self.schema, name="freetext_link", full_name="This parameter is a non-URL LINK", data_type=ParameterName.LINK) self.parametername_unresolvable_link.save() def tearDown(self): self.exp.delete() self.user.delete() self.parametername1.delete() self.parametername2.delete() self.parametername3.delete() self.parametername_exp_link.delete() self.parametername_dataset_link.delete() self.parametername_unresolvable_link.delete() self.schema.delete() def test_existing_parameterset(self): psm = ParameterSetManager(parameterset=self.datafileparameterset) self.assertTrue(psm.get_schema().namespace == "http://localhost/psmtest/df/") self.assertTrue(psm.get_param("parameter1").string_value == "test1") self.assertTrue(psm.get_param("parameter2", True) == 2) def test_new_parameterset(self): psm = ParameterSetManager(parentObject=self.datafile, schema="http://localhost/psmtest/df2/") self.assertTrue(psm.get_schema().namespace == "http://localhost/psmtest/df2/") psm.set_param("newparam1", "test3", "New Parameter 1") self.assertTrue(psm.get_param("newparam1").string_value == "test3") self.assertTrue(psm.get_param("newparam1").name.full_name == "New Parameter 1") psm.new_param("newparam1", "test4") self.assertTrue(len(psm.get_params("newparam1", True)) == 2) psm.set_param_list("newparam2", ("a", "b", "c", "d")) self.assertTrue(len(psm.get_params("newparam2")) == 4) psm.set_params_from_dict( {"newparam2": "test5", "newparam3": 3}) self.assertTrue(psm.get_param("newparam2", True) == "test5") # the newparam3 gets created and '3' is set to a string_value # since once cannot assume that an initial numeric value # will imply continuing numeric type for this new param self.assertTrue(psm.get_param("newparam3").string_value == '3') psm.delete_params("newparam1") self.assertTrue(len(psm.get_params("newparam1", True)) == 0) def test_link_parameter_type(self): """ Test that Parameter.link_gfk (GenericForeignKey) is correctly assigned after using Parameter.set_value(some_url) for a LINK Parameter. """ psm = ParameterSetManager(parameterset=self.datafileparameterset) # Check link to experiment exp_url = self.exp.get_absolute_url() # /experiment/view/1/ self.assertTrue(psm.get_param("exp_link").string_value == exp_url) self.assertTrue(psm.get_param("exp_link").link_id == self.exp.id) exp_ct = ContentType.objects.get(model__iexact="experiment") self.assertTrue(psm.get_param("exp_link").link_ct == exp_ct) self.assertTrue(psm.get_param("exp_link").link_gfk == self.exp) # Check link to dataset dataset_url = self.dataset.get_absolute_url() # /dataset/1/ self.assertTrue(psm.get_param("dataset_link").string_value == dataset_url) self.assertTrue(psm.get_param("dataset_link").link_id == self.dataset.id) dataset_ct = ContentType.objects.get(model__iexact="dataset") self.assertTrue(psm.get_param("dataset_link").link_ct == dataset_ct) self.assertTrue(psm.get_param("dataset_link").link_gfk == self.dataset) def test_link_parameter_type_extra(self): # make a second ParameterSet for testing some variations # in URL values self.datafileparameterset2 = DatafileParameterSet( schema=self.schema, datafile=self.datafile) self.datafileparameterset2.save() psm = ParameterSetManager(parameterset=self.datafileparameterset2) self.dataset_link_param2 = DatafileParameter( parameterset=self.datafileparameterset2, name=self.parametername_dataset_link) # /dataset/1 - no trailing slash dataset_url = self.dataset.get_absolute_url() self.dataset_link_param2.set_value(dataset_url) self.dataset_link_param2.save() # Check link_id/link_ct/link_gfk to dataset self.assertTrue(psm.get_param("dataset_link").link_id == self.dataset.id) dataset_ct = ContentType.objects.get(model__iexact="dataset") self.assertTrue(psm.get_param("dataset_link").link_ct == dataset_ct) self.assertTrue(psm.get_param("dataset_link").link_gfk == self.dataset) # Test links of the form /api/v1/experiment/<experiment_id>/ self.exp_link_param2 = DatafileParameter( parameterset=self.datafileparameterset2, name=self.parametername_exp_link) exp_url = '/api/v1/experiment/%s/' % self.exp.id self.exp_link_param2.set_value(exp_url) self.exp_link_param2.save() # Check link_id/link_ct/link_gfk to experiment self.assertTrue(psm.get_param("exp_link").link_id == self.exp.id) exp_ct = ContentType.objects.get(model__iexact="experiment") self.assertTrue(psm.get_param("exp_link").link_ct == exp_ct) self.assertTrue(psm.get_param("exp_link").link_gfk == self.exp) def test_unresolvable_link_parameter(self): """ Test that LINK Parameters that can't be resolved to a model (including non-URL values) still work. """ self.datafileparameterset3 = DatafileParameterSet( schema=self.schema, datafile=self.datafile) self.datafileparameterset3.save() psm = ParameterSetManager(parameterset=self.datafileparameterset3) # Create a Parameter of type LINK to an unresolvable (non-URL) # free-text value self.freetext_link_param = DatafileParameter( parameterset=self.datafileparameterset3, name=self.parametername_unresolvable_link) self.assertRaises(SuspiciousOperation, lambda: self.freetext_link_param.set_value( "FREETEXT_ID_123")) def test_tz_naive_date_handling(self): """ Ensure that dates are handling in a timezone-aware way. """ psm = ParameterSetManager(parameterset=self.datafileparameterset) psm.new_param("parameter3", str(datetime(1970, 01, 01, 10, 0, 0))) expect(psm.get_param("parameter3", True))\ .to_equal(datetime(1970, 01, 01, 0, 0, 0, tzinfo=pytz.utc)) def test_tz_aware_date_handling(self): """ Ensure that dates are handling in a timezone-aware way. """ psm = ParameterSetManager(parameterset=self.datafileparameterset) psm.new_param("parameter3", '1970-01-01T08:00:00+08:00') expect(psm.get_param("parameter3", True))\ .to_equal(datetime(1970, 01, 01, 0, 0, 0, tzinfo=pytz.utc))
def run_showinf(showinf_path, inputfilename, df_id, schema_id): """ Run Bioformats showinf to extract metadata. """ cache = caches['celery-locks'] # Locking functions to ensure only one instance of # showinf operates on each datafile at a time. lock_id = 'bioformats-filter-showinf-lock-%d' % df_id # cache.add fails if if the key already exists def acquire_lock(): return cache.add(lock_id, 'true', LOCK_EXPIRE) # cache.delete() can be slow, but we have to use it # to take advantage of using add() for atomic locking def release_lock(): cache.delete(lock_id) if acquire_lock(): try: schema = Schema.objects.get(id=schema_id) datafile = DataFile.objects.get(id=df_id) ps = DatafileParameterSet.objects.filter(schema=schema, datafile=datafile).first() if ps: info_param = ParameterName.objects.get(schema__id=schema_id, name='image_information') if DatafileParameter.objects.filter(parameterset=ps, name=info_param).exists(): logger.info("Metadata already exists for df_id %d" % df_id) return cmdline = "'%s' '%s' -nopix" % (showinf_path, inputfilename) logger.info(cmdline) p = subprocess.Popen(cmdline, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) stdout, _ = p.communicate() if p.returncode != 0: logger.error(stdout) return image_info_list = stdout.split('\n')[11:] # Some/all? of these excludes below are specific to DM3 format: exclude_line = dict() exclude_line['-----'] = None exclude_line['Reading global metadata'] = None exclude_line['Reading metadata'] = None exclude_line['Reading core metadata'] = None exclude_line['Populating metadata'] = None exclude_line['Reading tags'] = None exclude_line['Verifying Gatan format'] = None exclude_line['Initializing reader'] = None exclude_line['Checking file format [Gatan Digital Micrograph]'] = \ None try: ps = DatafileParameterSet.objects.get(schema__id=schema_id, datafile__id=df_id) except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, datafile=datafile) ps.save() param_name = ParameterName.objects.get(schema__id=schema_id, name='image_information') for val in reversed(image_info_list): strip_val = val.strip() if strip_val: if strip_val not in exclude_line: dfp = DatafileParameter(parameterset=ps, name=param_name) dfp.string_value = strip_val dfp.save() finally: release_lock()
def run_fcsplot(fcsplot_path, inputfilename, df_id, schema_id): """ Run fcsplot on a FCS file. """ cache = caches['celery-locks'] # Locking functions to ensure only one instance of # fcsplot operates on each datafile at a time. lock_id = 'fcs-filter-fcsplot-lock-%d' % df_id # cache.add fails if if the key already exists def acquire_lock(): return cache.add(lock_id, 'true', LOCK_EXPIRE) # cache.delete() can be slow, but we have to use it # to take advantage of using add() for atomic locking def release_lock(): cache.delete(lock_id) if acquire_lock(): try: schema = Schema.objects.get(id=schema_id) datafile = DataFile.objects.get(id=df_id) ps = DatafileParameterSet.objects.filter(schema=schema, datafile=datafile).first() if ps: prev_param = ParameterName.objects.get(schema__id=schema_id, name='previewImage') if DatafileParameter.objects.filter(parameterset=ps, name=prev_param).exists(): logger.info("FCS preview already exists for df_id %d" % df_id) return outputextension = "png" dfo = DataFileObject.objects.filter(datafile__id=df_id, verified=True).first() preview_image_rel_file_path = os.path.join( os.path.dirname(urlparse.urlparse(dfo.uri).path), str(df_id), '%s.%s' % (os.path.basename(inputfilename), outputextension)) preview_image_file_path = os.path.join( settings.METADATA_STORE_PATH, preview_image_rel_file_path) if not os.path.exists(os.path.dirname(preview_image_file_path)): os.makedirs(os.path.dirname(preview_image_file_path)) cmdline = "'%s' '%s' '%s' '%s'" % \ (sys.executable, fcsplot_path, inputfilename, preview_image_file_path) logger.info(cmdline) p = subprocess.Popen(cmdline, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) stdout, _ = p.communicate() if p.returncode != 0: logger.error(stdout) return try: ps = DatafileParameterSet.objects.get(schema__id=schema_id, datafile__id=df_id) except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, datafile=datafile) ps.save() param_name = ParameterName.objects.get(schema__id=schema_id, name='previewImage') dfp = DatafileParameter(parameterset=ps, name=param_name) dfp.string_value = preview_image_rel_file_path dfp.save() except: logger.error(traceback.format_exc()) finally: release_lock()
class ParameterSetManagerTestCase(TestCase): def setUp(self): from django.contrib.auth.models import User from tempfile import mkdtemp user = '******' pwd = 'secret' email = '' self.user = User.objects.create_user(user, email, pwd) self.test_dir = mkdtemp() self.exp = Experiment(title='test exp1', institution_name='monash', created_by=self.user) self.exp.save() self.dataset = Dataset(description="dataset description...") self.dataset.save() self.dataset.experiments.add(self.exp) self.dataset.save() self.datafile = DataFile(dataset=self.dataset, filename="testfile.txt", size="42", md5sum='bogus') self.datafile.save() self.dfo = DataFileObject( datafile=self.datafile, storage_box=self.datafile.get_default_storage_box(), uri="1/testfile.txt") self.dfo.save() self.schema = Schema(namespace="http://localhost/psmtest/df/", name="Parameter Set Manager", type=3) self.schema.save() self.parametername1 = ParameterName(schema=self.schema, name="parameter1", full_name="Parameter 1") self.parametername1.save() self.parametername2 = ParameterName(schema=self.schema, name="parameter2", full_name="Parameter 2", data_type=ParameterName.NUMERIC) self.parametername2.save() self.parametername3 = ParameterName(schema=self.schema, name="parameter3", full_name="Parameter 3", data_type=ParameterName.DATETIME) self.parametername3.save() self.datafileparameterset = DatafileParameterSet( schema=self.schema, datafile=self.datafile) self.datafileparameterset.save() self.datafileparameter1 = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername1, string_value="test1") self.datafileparameter1.save() self.datafileparameter2 = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername2, numerical_value=2) self.datafileparameter2.save() # Create a ParameterName and Parameter of type LINK to an experiment self.parametername_exp_link = ParameterName( schema=self.schema, name="exp_link", full_name="This parameter is a experiment LINK", data_type=ParameterName.LINK) self.parametername_exp_link.save() self.exp_link_param = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername_exp_link) exp_url = self.exp.get_absolute_url() # /experiment/view/1/ self.exp_link_param.set_value(exp_url) self.exp_link_param.save() # Create a ParameterName and Parameter of type LINK to a dataset self.parametername_dataset_link = ParameterName( schema=self.schema, name="dataset_link", full_name="This parameter is a dataset LINK", data_type=ParameterName.LINK) self.parametername_dataset_link.save() self.dataset_link_param = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername_dataset_link) dataset_url = self.dataset.get_absolute_url() # /dataset/1/ self.dataset_link_param.set_value(dataset_url) self.dataset_link_param.save() # Create a ParameterName type LINK to an unresolvable (non-URL) # free-text value self.parametername_unresolvable_link = ParameterName( schema=self.schema, name="freetext_link", full_name="This parameter is a non-URL LINK", data_type=ParameterName.LINK) self.parametername_unresolvable_link.save() def tearDown(self): self.exp.delete() self.user.delete() self.parametername1.delete() self.parametername2.delete() self.parametername3.delete() self.parametername_exp_link.delete() self.parametername_dataset_link.delete() self.parametername_unresolvable_link.delete() self.schema.delete() def test_existing_parameterset(self): psm = ParameterSetManager(parameterset=self.datafileparameterset) self.assertTrue( psm.get_schema().namespace == "http://localhost/psmtest/df/") self.assertTrue(psm.get_param("parameter1").string_value == "test1") self.assertTrue(psm.get_param("parameter2", True) == 2) def test_new_parameterset(self): psm = ParameterSetManager(parentObject=self.datafile, schema="http://localhost/psmtest/df2/") self.assertTrue( psm.get_schema().namespace == "http://localhost/psmtest/df2/") psm.set_param("newparam1", "test3", "New Parameter 1") self.assertTrue(psm.get_param("newparam1").string_value == "test3") self.assertTrue( psm.get_param("newparam1").name.full_name == "New Parameter 1") psm.new_param("newparam1", "test4") self.assertTrue(len(psm.get_params("newparam1", True)) == 2) psm.set_param_list("newparam2", ("a", "b", "c", "d")) self.assertTrue(len(psm.get_params("newparam2")) == 4) psm.set_params_from_dict({"newparam2": "test5", "newparam3": 3}) self.assertTrue(psm.get_param("newparam2", True) == "test5") # the newparam3 gets created and '3' is set to a string_value # since once cannot assume that an initial numeric value # will imply continuing numeric type for this new param self.assertTrue(psm.get_param("newparam3").string_value == '3') psm.delete_params("newparam1") self.assertTrue(len(psm.get_params("newparam1", True)) == 0) def test_link_parameter_type(self): """ Test that Parameter.link_gfk (GenericForeignKey) is correctly assigned after using Parameter.set_value(some_url) for a LINK Parameter. """ psm = ParameterSetManager(parameterset=self.datafileparameterset) # Check link to experiment exp_url = self.exp.get_absolute_url() # /experiment/view/1/ self.assertTrue(psm.get_param("exp_link").string_value == exp_url) self.assertTrue(psm.get_param("exp_link").link_id == self.exp.id) exp_ct = ContentType.objects.get(model__iexact="experiment") self.assertTrue(psm.get_param("exp_link").link_ct == exp_ct) self.assertTrue(psm.get_param("exp_link").link_gfk == self.exp) # Check link to dataset dataset_url = self.dataset.get_absolute_url() # /dataset/1/ self.assertTrue( psm.get_param("dataset_link").string_value == dataset_url) self.assertTrue( psm.get_param("dataset_link").link_id == self.dataset.id) dataset_ct = ContentType.objects.get(model__iexact="dataset") self.assertTrue(psm.get_param("dataset_link").link_ct == dataset_ct) self.assertTrue(psm.get_param("dataset_link").link_gfk == self.dataset) def test_link_parameter_type_extra(self): # make a second ParameterSet for testing some variations # in URL values self.datafileparameterset2 = DatafileParameterSet( schema=self.schema, datafile=self.datafile) self.datafileparameterset2.save() psm = ParameterSetManager(parameterset=self.datafileparameterset2) self.dataset_link_param2 = DatafileParameter( parameterset=self.datafileparameterset2, name=self.parametername_dataset_link) # /dataset/1 - no trailing slash dataset_url = self.dataset.get_absolute_url() self.dataset_link_param2.set_value(dataset_url) self.dataset_link_param2.save() # Check link_id/link_ct/link_gfk to dataset self.assertTrue( psm.get_param("dataset_link").link_id == self.dataset.id) dataset_ct = ContentType.objects.get(model__iexact="dataset") self.assertTrue(psm.get_param("dataset_link").link_ct == dataset_ct) self.assertTrue(psm.get_param("dataset_link").link_gfk == self.dataset) # Test links of the form /api/v1/experiment/<experiment_id>/ self.exp_link_param2 = DatafileParameter( parameterset=self.datafileparameterset2, name=self.parametername_exp_link) exp_url = '/api/v1/experiment/%s/' % self.exp.id self.exp_link_param2.set_value(exp_url) self.exp_link_param2.save() # Check link_id/link_ct/link_gfk to experiment self.assertTrue(psm.get_param("exp_link").link_id == self.exp.id) exp_ct = ContentType.objects.get(model__iexact="experiment") self.assertTrue(psm.get_param("exp_link").link_ct == exp_ct) self.assertTrue(psm.get_param("exp_link").link_gfk == self.exp) def test_unresolvable_link_parameter(self): """ Test that LINK Parameters that can't be resolved to a model (including non-URL values) still work. """ self.datafileparameterset3 = DatafileParameterSet( schema=self.schema, datafile=self.datafile) self.datafileparameterset3.save() psm = ParameterSetManager(parameterset=self.datafileparameterset3) # Create a Parameter of type LINK to an unresolvable (non-URL) # free-text value self.freetext_link_param = DatafileParameter( parameterset=self.datafileparameterset3, name=self.parametername_unresolvable_link) self.assertRaises( SuspiciousOperation, lambda: self.freetext_link_param.set_value("FREETEXT_ID_123")) def test_tz_naive_date_handling(self): """ Ensure that dates are handling in a timezone-aware way. """ psm = ParameterSetManager(parameterset=self.datafileparameterset) psm.new_param("parameter3", str(datetime(1970, 01, 01, 10, 0, 0))) expect(psm.get_param("parameter3", True))\ .to_equal(datetime(1970, 01, 01, 0, 0, 0, tzinfo=pytz.utc)) def test_tz_aware_date_handling(self): """ Ensure that dates are handling in a timezone-aware way. """ psm = ParameterSetManager(parameterset=self.datafileparameterset) psm.new_param("parameter3", '1970-01-01T08:00:00+08:00') expect(psm.get_param("parameter3", True))\ .to_equal(datetime(1970, 01, 01, 0, 0, 0, tzinfo=pytz.utc))
def run_showinf(showinf_path, inputfilename, df_id, schema_id): """ Run showinf on FCS file to extract metadata. """ cache = caches['celery-locks'] # Locking functions to ensure only one instance of # showinf operates on each datafile at a time. lock_id = 'fcs-filter-showinf-lock-%d' % df_id # cache.add fails if if the key already exists def acquire_lock(): return cache.add(lock_id, 'true', LOCK_EXPIRE) # cache.delete() can be slow, but we have to use it # to take advantage of using add() for atomic locking def release_lock(): cache.delete(lock_id) if acquire_lock(): try: schema = Schema.objects.get(id=schema_id) datafile = DataFile.objects.get(id=df_id) ps = DatafileParameterSet.objects.filter(schema=schema, datafile=datafile).first() if ps: file_param = ParameterName.objects.get(schema__id=schema_id, name='file') if DatafileParameter.objects.filter(parameterset=ps, name=file_param).exists(): logger.info("FCS metadata already exists for df_id %d" % df_id) return cmdline = "'%s' '%s' '%s'" % \ (sys.executable, showinf_path, inputfilename) logger.info(cmdline) p = subprocess.Popen(cmdline, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) stdout, _ = p.communicate() if p.returncode != 0: logger.error(stdout) return image_info_list = stdout.split('\n') metadata = { 'file': "", 'date': "", 'parametersAndStainsTable': ""} readingParametersAndStainsTable = False for line in image_info_list: m = re.match("File: (.*)", line) if m: metadata['file'] = m.group(1) m = re.match("Date: (.*)", line) if m: metadata['date'] = m.group(1) if line.strip() == "<ParametersAndStains>": readingParametersAndStainsTable = True elif line.strip() == "</ParametersAndStains>": readingParametersAndStainsTable = False elif readingParametersAndStainsTable: metadata['parametersAndStainsTable'] += line try: ps = DatafileParameterSet.objects.get(schema__id=schema_id, datafile__id=df_id) except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, datafile=datafile) ps.save() param_name_strings = ['file', 'date', 'parametersAndStainsTable'] for param_name_str in param_name_strings: try: param_name = ParameterName.objects.get(schema__id=schema_id, name=param_name_str) except ParameterName.DoesNotExist: logger.error("Didn't find parameter %s in schema id %s" % (param_name_str, schema_id)) dfp = DatafileParameter(parameterset=ps, name=param_name) dfp.string_value = metadata[param_name_str] dfp.save() except: logger.error(traceback.format_exc()) finally: release_lock()