def saveFlexstationMetadata(self, instance, schema, metadata): """Saves or overwrites the datafile's metadata to a Dataset_Files parameter set in the database. """ logger.info('Saving Metadata') parameters = self.getParameters(schema, metadata) if not parameters: return None try: ps = DatafileParameterSet.objects.get(schema=schema, dataset_file=instance) return ps # if already exists then just return it except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, dataset_file=instance) ps.save() for p in parameters: if p.name in metadata: dfp = DatafileParameter(parameterset=ps, name=p) if p.isNumeric(): if metadata[p.name] != '': dfp.numerical_value = metadata[p.name] dfp.save() else: dfp.string_value = metadata[p.name].decode('cp1252') dfp.save() return ps
def test_000_update_df_status_offline(self, mock_stat): """update_df_status should check the online status of preferred DFOs for all previously online datafiles and update online Parameter to 'False' for any offline files.""" df1 = DataFile(dataset=self.dataset, filename="test_df.jpg") df1.save() dfo1 = DataFileObject(datafile=df1, storage_box=self.sbox1, uri="stream/test.jpg", verified=True) dfo1.save() schema = Schema.objects.get(namespace=HSM_DATAFILE_NAMESPACE) ps = DatafileParameterSet(schema=schema, datafile=df1) ps.save() param_name = ParameterName.objects.get(schema=schema, name="online") param = DatafileParameter(parameterset=ps, name=param_name) param.string_value = True param.save() mock_stat.return_value = Stats(st_size=10000, st_blocks=0, st_mtime=datetime.now()) update_df_status() params = DatafileParameter.objects.filter( parameterset__schema=schema, parameterset__datafile=df1) self.assertEquals(params.count(), 1) self.assertEquals(params[0].string_value, "False")
def saveDicomMetadata(self, instance, schema, metadata): """Save all the metadata to a Dataset_Files paramamter set. """ # FIXME reenable this... #parameters = self.getParameters(schema, metadata) #if not parameters: # logger.debug('dicompng saveDicomMetadata: parameters == NULL :-(') # return None logger = logging.getLogger(__name__) logger.setLevel(10) logger.debug('dicompng saveDicomMetadata...') try: ps = DatafileParameterSet.objects.get(schema=schema, dataset_file=instance) return ps # if already exists then just return it except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, dataset_file=instance) ps.save() logger.debug('dicompng UP TO HERE, WHAT NEXT?') try: logger.debug('dicompng UP TO HERE2') dfp = DatafileParameter(parameterset=ps, name=ParameterName.objects.get(name='dump')) logger.debug('dicompng UP TO HERE3') dfp.string_value = metadata logger.debug('dicompng UP TO HERE4: ' + metadata) dfp.save() logger.debug('dicompng UP TO HERE5') except Exception, e: logger.debug('ZZZ' + str(e)) return None
def test_003_update_df_status_skip_offline(self, mock_stat, mock_df_online): """update_df_status should skip any files that have previously marked as offline.""" df2 = DataFile(dataset=self.dataset, filename="test_df2.jpg") df2.save() dfo2 = DataFileObject(datafile=df2, storage_box=self.sbox1, uri="stream/test_df2.jpg", verified=True) dfo2.save() # df2.verify() schema = Schema.objects.get(namespace=HSM_DATAFILE_NAMESPACE) ps2 = DatafileParameterSet(schema=schema, datafile=df2) ps2.save() param_name = ParameterName.objects.get(schema=schema, name="online") param2 = DatafileParameter(parameterset=ps2, name=param_name) param2.string_value = False param2.save() mock_stat.return_value = Stats(st_size=10000, st_blocks=100, st_mtime=datetime.now()) update_df_status() # assert that the df_online method wasn't called self.assertEquals(mock_df_online.call_count, 0)
def saveExifMetadata(self, instance, schema, metadata): """Save all the metadata to a Dataset_Files paramamter set. """ parameters = self.getParamaters(schema, metadata) if not parameters: return None try: ps = DatafileParameterSet.objects.get(schema=schema, dataset_file=instance) return ps # if already exists then just return it except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, dataset_file=instance) ps.save() for p in parameters: if p.name in metadata: dfp = DatafileParameter(parameterset=ps, name=p) if p.isNumeric(): dfp.numerical_value = metadata[p.name] else: dfp.string_value = metadata[p.name] dfp.save() return ps
def saveMetadata(self, instance, schema, metadata): """Save all the metadata to a Dataset_Files paramamter set. """ parameters = self.getParameters(schema, metadata) exclude_line = dict() exclude_line['-----'] = None exclude_line['Reading global metadata'] = None exclude_line['Reading metadata'] = None exclude_line['Reading core metadata'] = None exclude_line['Populating metadata'] = None exclude_line['Reading tags'] = None exclude_line['Verifying Gatan format'] = None exclude_line['Initializing reader'] = None exclude_line['Checking file format [Gatan Digital Micrograph]'] = None if not parameters: return None try: ps = DatafileParameterSet.objects.get(schema=schema, dataset_file=instance) return ps # if already exists then just return it except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, dataset_file=instance) ps.save() for p in parameters: print p.name if p.name in metadata: dfp = DatafileParameter(parameterset=ps, name=p) if p.isNumeric(): if metadata[p.name] != '': dfp.numerical_value = metadata[p.name] dfp.save() else: print p.name if isinstance(metadata[p.name], list): for val in reversed(metadata[p.name]): strip_val = val.strip() if strip_val: if not strip_val in exclude_line: dfp = DatafileParameter(parameterset=ps, name=p) dfp.string_value = strip_val dfp.save() else: dfp.string_value = metadata[p.name] dfp.save() return ps
def create_df_status(datafile, schema_name, min_file_size): """Post-save celery task that checks online status of new file and create HSM metadata to track online status. Parameters ---------- datafile: DataFile datafile to check and create online/offline status metadata for schema_name: Schema name of Schema which describes ParameterNames min_file_size : int minimum size of files to check HSM status of. This param is simply passed on to df_online. Returns ------- None """ if datafile.verified: with DatafileLock(datafile, "datafile-%s" % datafile.id) as lock: if lock: schema = Schema.objects.get(namespace=schema_name) if DatafileParameterSet.objects.filter( schema=schema, datafile=datafile).exists(): LOGGER.debug( """HSM DatafileParameterSet already exists for: %s""", datafile.id ) return ps = DatafileParameterSet(schema=schema, datafile=datafile) ps.save() param_name = ParameterName.objects.get( schema=schema, name="online" ) dfp = DatafileParameter(parameterset=ps, name=param_name) dfp.string_value = str(df_online(datafile, min_file_size)) dfp.save() else: LOGGER.warning( """Cannot determine online/offline status for datafile %s " "is not verified""", datafile.id )
def saveMetadata(self, instance, schema, metadata): """Save all the metadata to a Dataset_Files paramamter set. """ parameters = self.getParameters(schema, metadata) # Some/all? of these excludes below are specific to DM3 format: exclude_line = dict() if not parameters: print "Bailing out of saveMetadata because of 'not parameters'." return None try: ps = DatafileParameterSet.objects.get(schema=schema, datafile=instance) print "Parameter set already exists for %s, so we'll just " \ "return it." % instance.filename return ps # if already exists then just return it except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, datafile=instance) ps.save() for p in parameters: print p.name if p.name in metadata: dfp = DatafileParameter(parameterset=ps, name=p) if p.isNumeric(): if metadata[p.name] != '': dfp.numerical_value = metadata[p.name] dfp.save() else: print p.name if isinstance(metadata[p.name], list): for val in reversed(metadata[p.name]): strip_val = val.strip() if strip_val: if strip_val not in exclude_line: dfp = DatafileParameter(parameterset=ps, name=p) dfp.string_value = strip_val dfp.save() else: dfp.string_value = metadata[p.name] dfp.save() return ps
def setUp(self): """ setting up essential objects, copied from tests above """ user = "******" pwd = "secret" email = "" self.user = User.objects.create_user(user, email, pwd) self.userProfile = self.user.userprofile self.exp = Experiment(title="test exp1", institution_name="monash", created_by=self.user) self.exp.save() self.acl = ObjectACL( pluginId=django_user, entityId=str(self.user.id), content_object=self.exp, canRead=True, isOwner=True, aclOwnershipType=ObjectACL.OWNER_OWNED, ) self.acl.save() self.dataset = Dataset(description="dataset description...") self.dataset.save() self.dataset.experiments.add(self.exp) self.dataset.save() self.datafile = DataFile(dataset=self.dataset, size=42, filename="foo", md5sum="junk") self.datafile.save() self.testschema = Schema( namespace="http://test.com/test/schema", name="Test View", type=Schema.DATAFILE, hidden=True ) self.testschema.save() self.dfps = DatafileParameterSet(datafile=self.datafile, schema=self.testschema) self.dfps.save()
def test_unresolvable_link_parameter(self): """ Test that LINK Parameters that can't be resolved to a model (including non-URL values) still work. """ self.datafileparameterset3 = DatafileParameterSet( schema=self.schema, datafile=self.datafile) self.datafileparameterset3.save() psm = ParameterSetManager(parameterset=self.datafileparameterset3) # Create a Parameter of type LINK to an unresolvable (non-URL) # free-text value self.freetext_link_param = DatafileParameter( parameterset=self.datafileparameterset3, name=self.parametername_unresolvable_link) self.assertRaises( SuspiciousOperation, lambda: self.freetext_link_param.set_value("FREETEXT_ID_123"))
def saveMetadata(self, instance, schema, metadata): """Save all the metadata to a Dataset_Files parameter set. """ logger.error('Olympus-saveMetadata()') parameters = self.getParameters(schema, metadata) if not parameters: return None try: ps = DatafileParameterSet.objects.get(schema=schema, dataset_file=instance) return ps # if already exists then just return it except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, dataset_file=instance) ps.save() for p in parameters: print p.name if p.name in metadata: dfp = DatafileParameter(parameterset=ps, name=p) if p.isNumeric(): if metadata[p.name] != '': dfp.numerical_value = metadata[p.name] dfp.save() else: print p.name if isinstance(metadata[p.name], list): for val in reversed(metadata[p.name]): strip_val = val.strip() if strip_val: if not strip_val in exclude_line: dfp = DatafileParameter(parameterset=ps, name=p) dfp.string_value = strip_val dfp.save() else: dfp.string_value = metadata[p.name] dfp.save() return ps
def setUp(self): """ setting up essential objects, copied from tests above """ user = '******' pwd = 'secret' email = '' self.user = User.objects.create_user(user, email, pwd) self.userProfile = self.user.userprofile self.exp = Experiment(title='test exp1', institution_name='monash', created_by=self.user) self.exp.save() self.acl = ObjectACL( pluginId=django_user, entityId=str(self.user.id), content_object=self.exp, canRead=True, isOwner=True, aclOwnershipType=ObjectACL.OWNER_OWNED, ) self.acl.save() self.dataset = Dataset(description='dataset description...') self.dataset.save() self.dataset.experiments.add(self.exp) self.dataset.save() self.datafile = DataFile(dataset=self.dataset, size=42, filename="foo", md5sum="junk") self.datafile.save() self.testschema = Schema(namespace="http://test.com/test/schema", name="Test View", type=Schema.DATAFILE, hidden=True) self.testschema.save() self.dfps = DatafileParameterSet(datafile=self.datafile, schema=self.testschema) self.dfps.save()
def test_link_parameter_type_extra(self): # make a second ParameterSet for testing some variations # in URL values self.datafileparameterset2 = DatafileParameterSet( schema=self.schema, datafile=self.datafile) self.datafileparameterset2.save() psm = ParameterSetManager(parameterset=self.datafileparameterset2) self.dataset_link_param2 = DatafileParameter( parameterset=self.datafileparameterset2, name=self.parametername_dataset_link) # /dataset/1 - no trailing slash dataset_url = self.dataset.get_absolute_url() self.dataset_link_param2.set_value(dataset_url) self.dataset_link_param2.save() # Check link_id/link_ct/link_gfk to dataset self.assertTrue( psm.get_param("dataset_link").link_id == self.dataset.id) dataset_ct = ContentType.objects.get(model__iexact="dataset") self.assertTrue(psm.get_param("dataset_link").link_ct == dataset_ct) self.assertTrue(psm.get_param("dataset_link").link_gfk == self.dataset) # Test links of the form /api/v1/experiment/<experiment_id>/ self.exp_link_param2 = DatafileParameter( parameterset=self.datafileparameterset2, name=self.parametername_exp_link) exp_url = '/api/v1/experiment/%s/' % self.exp.id self.exp_link_param2.set_value(exp_url) self.exp_link_param2.save() # Check link_id/link_ct/link_gfk to experiment self.assertTrue(psm.get_param("exp_link").link_id == self.exp.id) exp_ct = ContentType.objects.get(model__iexact="experiment") self.assertTrue(psm.get_param("exp_link").link_ct == exp_ct) self.assertTrue(psm.get_param("exp_link").link_gfk == self.exp)
def register_squashfile(exp_id, epn, sq_dir, sq_filename, namespace): ''' example: register_squashfile(456, '1234A', '/srv/squashstore', '1234A.squashfs', 'http://synchrotron.org.au/mx/squashfsarchive/1') ''' dfs = DataFile.objects.filter(filename=sq_filename, dataset__experiments__id=exp_id) if len(dfs) == 1: return dfs[0] e = Experiment.objects.get(id=exp_id) ds = Dataset(description="01 SquashFS Archive") ds.save() ds.experiments.add(e) filepath = os.path.join(sq_dir, sq_filename) try: md5sum = open(filepath + '.md5sum', 'r').read().strip()[:32] except: print 'no md5sum file found' return None size = os.path.getsize(filepath) df = DataFile(md5sum=md5sum, filename=sq_filename, size=str(size), dataset=ds) df.save() schema = Schema.objects.filter(namespace=namespace)[0] ps = DatafileParameterSet(schema=schema, datafile=df) ps.save() ps.set_param('EPN', epn) sbox = StorageBox.objects.get(name='squashstore') dfo = DataFileObject(storage_box=sbox, datafile=df, uri=sq_filename) dfo.save() return df
def test_002_update_df_status_skip_unverified(self, mock_stat, df_online): """update_df_status should skip files that are unverified""" df2 = DataFile(dataset=self.dataset, filename="test_df2.jpg") df2.save() dfo2 = DataFileObject(datafile=df2, storage_box=self.sbox1, uri="stream/test_df2.jpg") dfo2.save() schema = Schema.objects.get(namespace=HSM_DATAFILE_NAMESPACE) ps2 = DatafileParameterSet(schema=schema, datafile=df2) ps2.save() param_name = ParameterName.objects.get(schema=schema, name="online") param2 = DatafileParameter(parameterset=ps2, name=param_name) param2.string_value = True param2.save() mock_stat.return_value = Stats(st_size=10000, st_blocks=100, st_mtime=datetime.now()) update_df_status() df_online.assert_not_called()
def test_unresolvable_link_parameter(self): """ Test that LINK Parameters that can't be resolved to a model (including non-URL values) still work. """ self.datafileparameterset3 = DatafileParameterSet( schema=self.schema, datafile=self.datafile) self.datafileparameterset3.save() psm = ParameterSetManager(parameterset=self.datafileparameterset3) # Create a Parameter of type LINK to an unresolvable (non-URL) # free-text value self.freetext_link_param = DatafileParameter( parameterset=self.datafileparameterset3, name=self.parametername_unresolvable_link) self.assertRaises(SuspiciousOperation, lambda: self.freetext_link_param.set_value( "FREETEXT_ID_123"))
def test_link_parameter_type_extra(self): # make a second ParameterSet for testing some variations # in URL values self.datafileparameterset2 = DatafileParameterSet( schema=self.schema, datafile=self.datafile) self.datafileparameterset2.save() psm = ParameterSetManager(parameterset=self.datafileparameterset2) self.dataset_link_param2 = DatafileParameter( parameterset=self.datafileparameterset2, name=self.parametername_dataset_link) # /dataset/1 - no trailing slash dataset_url = self.dataset.get_absolute_url() self.dataset_link_param2.set_value(dataset_url) self.dataset_link_param2.save() # Check link_id/link_ct/link_gfk to dataset self.assertTrue(psm.get_param("dataset_link").link_id == self.dataset.id) dataset_ct = ContentType.objects.get(model__iexact="dataset") self.assertTrue(psm.get_param("dataset_link").link_ct == dataset_ct) self.assertTrue(psm.get_param("dataset_link").link_gfk == self.dataset) # Test links of the form /api/v1/experiment/<experiment_id>/ self.exp_link_param2 = DatafileParameter( parameterset=self.datafileparameterset2, name=self.parametername_exp_link) exp_url = '/api/v1/experiment/%s/' % self.exp.id self.exp_link_param2.set_value(exp_url) self.exp_link_param2.save() # Check link_id/link_ct/link_gfk to experiment self.assertTrue(psm.get_param("exp_link").link_id == self.exp.id) exp_ct = ContentType.objects.get(model__iexact="experiment") self.assertTrue(psm.get_param("exp_link").link_ct == exp_ct) self.assertTrue(psm.get_param("exp_link").link_gfk == self.exp)
def __init__(self, parameterset=None, parentObject=None, schema=None): """ instantiate new task or existing ParameterSet :param dataset: optional parameter to instanciate task from metadata, will be tested for completeness and copied into new task if complete :type dataset: Dataset :param schema: Schema namespace :type schema: string """ if parameterset: self.parameterset = parameterset self.schema = self.parameterset.schema self.namespace = self.schema.namespace if isinstance(self.parameterset, DatafileParameterSet): self.parameters = DatafileParameter.objects.filter(\ parameterset=self.parameterset).order_by('name__full_name') self.blank_param = DatafileParameter elif isinstance(self.parameterset, DatasetParameterSet): self.parameters = DatasetParameter.objects.filter(\ parameterset=self.parameterset).order_by('name__full_name') self.blank_param = DatasetParameter elif isinstance(self.parameterset, ExperimentParameterSet): self.parameters = ExperimentParameter.objects.filter(\ parameterset=self.parameterset).order_by('name__full_name') self.blank_param = ExperimentParameter else: raise TypeError("Invalid parameterset object given.") elif parentObject and schema: self.namespace = schema if isinstance(parentObject, Dataset_File): self.parameterset = DatafileParameterSet( schema=self.get_schema(), dataset_file=parentObject) self.parameterset.save() self.parameters = DatafileParameter.objects.filter( parameterset=self.parameterset) self.blank_param = DatafileParameter elif isinstance(parentObject, Dataset): self.parameterset = DatasetParameterSet( schema=self.get_schema(), dataset=parentObject) self.parameterset.save() self.parameters = DatasetParameter.objects.filter( parameterset=self.parameterset) self.blank_param = DatasetParameter elif isinstance(parentObject, Experiment): self.parameterset = ExperimentParameterSet( schema=self.get_schema(), experiment=parentObject) self.parameterset.save() self.parameters = ExperimentParameter.objects.filter( parameterset=self.parameterset) self.blank_param = ExperimentParameter else: raise TypeError("Invalid parent object." + "Must be an experiment/dataset/datafile not " + str(type(parentObject))) else: raise TypeError("Missing arguments")
class ParameterSetManagerTestCase(TestCase): def setUp(self): from django.contrib.auth.models import User from tempfile import mkdtemp user = '******' pwd = 'secret' email = '' self.user = User.objects.create_user(user, email, pwd) self.test_dir = mkdtemp() self.exp = Experiment(title='test exp1', institution_name='monash', created_by=self.user) self.exp.save() self.dataset = Dataset(description="dataset description...") self.dataset.save() self.dataset.experiments.add(self.exp) self.dataset.save() self.datafile = DataFile(dataset=self.dataset, filename="testfile.txt", size="42", md5sum='bogus') self.datafile.save() self.dfo = DataFileObject( datafile=self.datafile, storage_box=self.datafile.get_default_storage_box(), uri="1/testfile.txt") self.dfo.save() self.schema = Schema(namespace="http://localhost/psmtest/df/", name="Parameter Set Manager", type=3) self.schema.save() self.parametername1 = ParameterName(schema=self.schema, name="parameter1", full_name="Parameter 1") self.parametername1.save() self.parametername2 = ParameterName(schema=self.schema, name="parameter2", full_name="Parameter 2", data_type=ParameterName.NUMERIC) self.parametername2.save() self.parametername3 = ParameterName(schema=self.schema, name="parameter3", full_name="Parameter 3", data_type=ParameterName.DATETIME) self.parametername3.save() self.datafileparameterset = DatafileParameterSet( schema=self.schema, datafile=self.datafile) self.datafileparameterset.save() self.datafileparameter1 = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername1, string_value="test1") self.datafileparameter1.save() self.datafileparameter2 = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername2, numerical_value=2) self.datafileparameter2.save() # Create a ParameterName and Parameter of type LINK to an experiment self.parametername_exp_link = ParameterName( schema=self.schema, name="exp_link", full_name="This parameter is a experiment LINK", data_type=ParameterName.LINK) self.parametername_exp_link.save() self.exp_link_param = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername_exp_link) exp_url = self.exp.get_absolute_url() # /experiment/view/1/ self.exp_link_param.set_value(exp_url) self.exp_link_param.save() # Create a ParameterName and Parameter of type LINK to a dataset self.parametername_dataset_link = ParameterName( schema=self.schema, name="dataset_link", full_name="This parameter is a dataset LINK", data_type=ParameterName.LINK) self.parametername_dataset_link.save() self.dataset_link_param = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername_dataset_link) dataset_url = self.dataset.get_absolute_url() # /dataset/1/ self.dataset_link_param.set_value(dataset_url) self.dataset_link_param.save() # Create a ParameterName type LINK to an unresolvable (non-URL) # free-text value self.parametername_unresolvable_link = ParameterName( schema=self.schema, name="freetext_link", full_name="This parameter is a non-URL LINK", data_type=ParameterName.LINK) self.parametername_unresolvable_link.save() def tearDown(self): self.exp.delete() self.user.delete() self.parametername1.delete() self.parametername2.delete() self.parametername3.delete() self.parametername_exp_link.delete() self.parametername_dataset_link.delete() self.parametername_unresolvable_link.delete() self.schema.delete() def test_existing_parameterset(self): psm = ParameterSetManager(parameterset=self.datafileparameterset) self.assertTrue( psm.get_schema().namespace == "http://localhost/psmtest/df/") self.assertTrue(psm.get_param("parameter1").string_value == "test1") self.assertTrue(psm.get_param("parameter2", True) == 2) def test_new_parameterset(self): psm = ParameterSetManager(parentObject=self.datafile, schema="http://localhost/psmtest/df2/") self.assertTrue( psm.get_schema().namespace == "http://localhost/psmtest/df2/") psm.set_param("newparam1", "test3", "New Parameter 1") self.assertTrue(psm.get_param("newparam1").string_value == "test3") self.assertTrue( psm.get_param("newparam1").name.full_name == "New Parameter 1") psm.new_param("newparam1", "test4") self.assertTrue(len(psm.get_params("newparam1", True)) == 2) psm.set_param_list("newparam2", ("a", "b", "c", "d")) self.assertTrue(len(psm.get_params("newparam2")) == 4) psm.set_params_from_dict({"newparam2": "test5", "newparam3": 3}) self.assertTrue(psm.get_param("newparam2", True) == "test5") # the newparam3 gets created and '3' is set to a string_value # since once cannot assume that an initial numeric value # will imply continuing numeric type for this new param self.assertTrue(psm.get_param("newparam3").string_value == '3') psm.delete_params("newparam1") self.assertTrue(len(psm.get_params("newparam1", True)) == 0) def test_link_parameter_type(self): """ Test that Parameter.link_gfk (GenericForeignKey) is correctly assigned after using Parameter.set_value(some_url) for a LINK Parameter. """ psm = ParameterSetManager(parameterset=self.datafileparameterset) # Check link to experiment exp_url = self.exp.get_absolute_url() # /experiment/view/1/ self.assertTrue(psm.get_param("exp_link").string_value == exp_url) self.assertTrue(psm.get_param("exp_link").link_id == self.exp.id) exp_ct = ContentType.objects.get(model__iexact="experiment") self.assertTrue(psm.get_param("exp_link").link_ct == exp_ct) self.assertTrue(psm.get_param("exp_link").link_gfk == self.exp) # Check link to dataset dataset_url = self.dataset.get_absolute_url() # /dataset/1/ self.assertTrue( psm.get_param("dataset_link").string_value == dataset_url) self.assertTrue( psm.get_param("dataset_link").link_id == self.dataset.id) dataset_ct = ContentType.objects.get(model__iexact="dataset") self.assertTrue(psm.get_param("dataset_link").link_ct == dataset_ct) self.assertTrue(psm.get_param("dataset_link").link_gfk == self.dataset) def test_link_parameter_type_extra(self): # make a second ParameterSet for testing some variations # in URL values self.datafileparameterset2 = DatafileParameterSet( schema=self.schema, datafile=self.datafile) self.datafileparameterset2.save() psm = ParameterSetManager(parameterset=self.datafileparameterset2) self.dataset_link_param2 = DatafileParameter( parameterset=self.datafileparameterset2, name=self.parametername_dataset_link) # /dataset/1 - no trailing slash dataset_url = self.dataset.get_absolute_url() self.dataset_link_param2.set_value(dataset_url) self.dataset_link_param2.save() # Check link_id/link_ct/link_gfk to dataset self.assertTrue( psm.get_param("dataset_link").link_id == self.dataset.id) dataset_ct = ContentType.objects.get(model__iexact="dataset") self.assertTrue(psm.get_param("dataset_link").link_ct == dataset_ct) self.assertTrue(psm.get_param("dataset_link").link_gfk == self.dataset) # Test links of the form /api/v1/experiment/<experiment_id>/ self.exp_link_param2 = DatafileParameter( parameterset=self.datafileparameterset2, name=self.parametername_exp_link) exp_url = '/api/v1/experiment/%s/' % self.exp.id self.exp_link_param2.set_value(exp_url) self.exp_link_param2.save() # Check link_id/link_ct/link_gfk to experiment self.assertTrue(psm.get_param("exp_link").link_id == self.exp.id) exp_ct = ContentType.objects.get(model__iexact="experiment") self.assertTrue(psm.get_param("exp_link").link_ct == exp_ct) self.assertTrue(psm.get_param("exp_link").link_gfk == self.exp) def test_unresolvable_link_parameter(self): """ Test that LINK Parameters that can't be resolved to a model (including non-URL values) still work. """ self.datafileparameterset3 = DatafileParameterSet( schema=self.schema, datafile=self.datafile) self.datafileparameterset3.save() psm = ParameterSetManager(parameterset=self.datafileparameterset3) # Create a Parameter of type LINK to an unresolvable (non-URL) # free-text value self.freetext_link_param = DatafileParameter( parameterset=self.datafileparameterset3, name=self.parametername_unresolvable_link) self.assertRaises( SuspiciousOperation, lambda: self.freetext_link_param.set_value("FREETEXT_ID_123")) def test_tz_naive_date_handling(self): """ Ensure that dates are handling in a timezone-aware way. """ psm = ParameterSetManager(parameterset=self.datafileparameterset) psm.new_param("parameter3", str(datetime(1970, 01, 01, 10, 0, 0))) expect(psm.get_param("parameter3", True))\ .to_equal(datetime(1970, 01, 01, 0, 0, 0, tzinfo=pytz.utc)) def test_tz_aware_date_handling(self): """ Ensure that dates are handling in a timezone-aware way. """ psm = ParameterSetManager(parameterset=self.datafileparameterset) psm.new_param("parameter3", '1970-01-01T08:00:00+08:00') expect(psm.get_param("parameter3", True))\ .to_equal(datetime(1970, 01, 01, 0, 0, 0, tzinfo=pytz.utc))
def setUp(self): from django.contrib.auth.models import User from tempfile import mkdtemp user = '******' pwd = 'secret' email = '' self.user = User.objects.create_user(user, email, pwd) self.test_dir = mkdtemp() self.exp = Experiment(title='test exp1', institution_name='monash', created_by=self.user) self.exp.save() self.dataset = Dataset(description="dataset description...") self.dataset.save() self.dataset.experiments.add(self.exp) self.dataset.save() self.datafile = DataFile(dataset=self.dataset, filename="testfile.txt", size="42", md5sum='bogus') self.datafile.save() self.dfo = DataFileObject( datafile=self.datafile, storage_box=self.datafile.get_default_storage_box(), uri="1/testfile.txt") self.dfo.save() self.schema = Schema(namespace="http://localhost/psmtest/df/", name="Parameter Set Manager", type=3) self.schema.save() self.parametername1 = ParameterName(schema=self.schema, name="parameter1", full_name="Parameter 1") self.parametername1.save() self.parametername2 = ParameterName(schema=self.schema, name="parameter2", full_name="Parameter 2", data_type=ParameterName.NUMERIC) self.parametername2.save() self.parametername3 = ParameterName(schema=self.schema, name="parameter3", full_name="Parameter 3", data_type=ParameterName.DATETIME) self.parametername3.save() self.datafileparameterset = DatafileParameterSet( schema=self.schema, datafile=self.datafile) self.datafileparameterset.save() self.datafileparameter1 = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername1, string_value="test1") self.datafileparameter1.save() self.datafileparameter2 = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername2, numerical_value=2) self.datafileparameter2.save() # Create a ParameterName and Parameter of type LINK to an experiment self.parametername_exp_link = ParameterName( schema=self.schema, name="exp_link", full_name="This parameter is a experiment LINK", data_type=ParameterName.LINK) self.parametername_exp_link.save() self.exp_link_param = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername_exp_link) exp_url = self.exp.get_absolute_url() # /experiment/view/1/ self.exp_link_param.set_value(exp_url) self.exp_link_param.save() # Create a ParameterName and Parameter of type LINK to a dataset self.parametername_dataset_link = ParameterName( schema=self.schema, name="dataset_link", full_name="This parameter is a dataset LINK", data_type=ParameterName.LINK) self.parametername_dataset_link.save() self.dataset_link_param = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername_dataset_link) dataset_url = self.dataset.get_absolute_url() # /dataset/1/ self.dataset_link_param.set_value(dataset_url) self.dataset_link_param.save() # Create a ParameterName type LINK to an unresolvable (non-URL) # free-text value self.parametername_unresolvable_link = ParameterName( schema=self.schema, name="freetext_link", full_name="This parameter is a non-URL LINK", data_type=ParameterName.LINK) self.parametername_unresolvable_link.save()
def run_showinf(showinf_path, inputfilename, df_id, schema_id): """ Run Bioformats showinf to extract metadata. """ cache = caches['celery-locks'] # Locking functions to ensure only one instance of # showinf operates on each datafile at a time. lock_id = 'bioformats-filter-showinf-lock-%d' % df_id # cache.add fails if if the key already exists def acquire_lock(): return cache.add(lock_id, 'true', LOCK_EXPIRE) # cache.delete() can be slow, but we have to use it # to take advantage of using add() for atomic locking def release_lock(): cache.delete(lock_id) if acquire_lock(): try: schema = Schema.objects.get(id=schema_id) datafile = DataFile.objects.get(id=df_id) ps = DatafileParameterSet.objects.filter(schema=schema, datafile=datafile).first() if ps: info_param = ParameterName.objects.get(schema__id=schema_id, name='image_information') if DatafileParameter.objects.filter(parameterset=ps, name=info_param).exists(): logger.info("Metadata already exists for df_id %d" % df_id) return cmdline = "'%s' '%s' -nopix" % (showinf_path, inputfilename) logger.info(cmdline) p = subprocess.Popen(cmdline, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) stdout, _ = p.communicate() if p.returncode != 0: logger.error(stdout) return image_info_list = stdout.split('\n')[11:] # Some/all? of these excludes below are specific to DM3 format: exclude_line = dict() exclude_line['-----'] = None exclude_line['Reading global metadata'] = None exclude_line['Reading metadata'] = None exclude_line['Reading core metadata'] = None exclude_line['Populating metadata'] = None exclude_line['Reading tags'] = None exclude_line['Verifying Gatan format'] = None exclude_line['Initializing reader'] = None exclude_line['Checking file format [Gatan Digital Micrograph]'] = \ None try: ps = DatafileParameterSet.objects.get(schema__id=schema_id, datafile__id=df_id) except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, datafile=datafile) ps.save() param_name = ParameterName.objects.get(schema__id=schema_id, name='image_information') for val in reversed(image_info_list): strip_val = val.strip() if strip_val: if strip_val not in exclude_line: dfp = DatafileParameter(parameterset=ps, name=param_name) dfp.string_value = strip_val dfp.save() finally: release_lock()
class ContextualViewTest(TestCase): def setUp(self): """ setting up essential objects, copied from tests above """ user = '******' pwd = 'secret' email = '' self.user = User.objects.create_user(user, email, pwd) self.userProfile = self.user.userprofile self.exp = Experiment(title='test exp1', institution_name='monash', created_by=self.user) self.exp.save() self.acl = ObjectACL( pluginId=django_user, entityId=str(self.user.id), content_object=self.exp, canRead=True, isOwner=True, aclOwnershipType=ObjectACL.OWNER_OWNED, ) self.acl.save() self.dataset = Dataset(description='dataset description...') self.dataset.save() self.dataset.experiments.add(self.exp) self.dataset.save() self.datafile = DataFile(dataset=self.dataset, size=42, filename="foo", md5sum="junk") self.datafile.save() self.testschema = Schema(namespace="http://test.com/test/schema", name="Test View", type=Schema.DATAFILE, hidden=True) self.testschema.save() self.dfps = DatafileParameterSet(datafile=self.datafile, schema=self.testschema) self.dfps.save() def tearDown(self): self.user.delete() self.exp.delete() self.dataset.delete() self.datafile.delete() self.testschema.delete() self.dfps.delete() self.acl.delete() def testDetailsDisplay(self): """ test display of view for an existing schema and no display for an undefined one. """ from tardis.tardis_portal.views import display_datafile_details request = flexmock(user=self.user, groups=[("testgroup", flexmock())]) with self.settings(DATAFILE_VIEWS=[( "http://test.com/test/schema", "/test/url"), ("http://does.not.exist", "/false/url")]): response = display_datafile_details(request, datafile_id=self.datafile.id) self.assertEqual(response.status_code, 200) self.assertTrue("/ajax/parameters/" in response.content) self.assertTrue("/test/url" in response.content) self.assertFalse("/false/url" in response.content)
def setUp(self): from django.contrib.auth.models import User from tempfile import mkdtemp user = '******' pwd = 'secret' email = '' self.user = User.objects.create_user(user, email, pwd) self.test_dir = mkdtemp() self.exp = Experiment(title='test exp1', institution_name='monash', created_by=self.user) self.exp.save() self.dataset = Dataset(description="dataset description...") self.dataset.save() self.dataset.experiments.add(self.exp) self.dataset.save() self.datafile = DataFile(dataset=self.dataset, filename="testfile.txt", size="42", md5sum='bogus') self.datafile.save() self.dfo = DataFileObject( datafile=self.datafile, storage_box=self.datafile.get_default_storage_box(), uri="1/testfile.txt") self.dfo.save() self.schema = Schema( namespace="http://localhost/psmtest/df/", name="Parameter Set Manager", type=3) self.schema.save() self.parametername1 = ParameterName( schema=self.schema, name="parameter1", full_name="Parameter 1") self.parametername1.save() self.parametername2 = ParameterName( schema=self.schema, name="parameter2", full_name="Parameter 2", data_type=ParameterName.NUMERIC) self.parametername2.save() self.parametername3 = ParameterName( schema=self.schema, name="parameter3", full_name="Parameter 3", data_type=ParameterName.DATETIME) self.parametername3.save() self.datafileparameterset = DatafileParameterSet( schema=self.schema, datafile=self.datafile) self.datafileparameterset.save() self.datafileparameter1 = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername1, string_value="test1") self.datafileparameter1.save() self.datafileparameter2 = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername2, numerical_value=2) self.datafileparameter2.save() # Create a ParameterName and Parameter of type LINK to an experiment self.parametername_exp_link = ParameterName( schema=self.schema, name="exp_link", full_name="This parameter is a experiment LINK", data_type=ParameterName.LINK) self.parametername_exp_link.save() self.exp_link_param = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername_exp_link) exp_url = self.exp.get_absolute_url() # /experiment/view/1/ self.exp_link_param.set_value(exp_url) self.exp_link_param.save() # Create a ParameterName and Parameter of type LINK to a dataset self.parametername_dataset_link = ParameterName( schema=self.schema, name="dataset_link", full_name="This parameter is a dataset LINK", data_type=ParameterName.LINK) self.parametername_dataset_link.save() self.dataset_link_param = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername_dataset_link) dataset_url = self.dataset.get_absolute_url() # /dataset/1/ self.dataset_link_param.set_value(dataset_url) self.dataset_link_param.save() # Create a ParameterName type LINK to an unresolvable (non-URL) # free-text value self.parametername_unresolvable_link = ParameterName( schema=self.schema, name="freetext_link", full_name="This parameter is a non-URL LINK", data_type=ParameterName.LINK) self.parametername_unresolvable_link.save()
def process_meta(df_id, schema_name, overwrite=False, **kwargs): """Extract metadata from a Datafile using the get_meta function and save the outputs as DatafileParameters. Parameters ---------- df_id: int ID of Datafile instance to process. schema_name: str Names of schema which describes ParameterNames add: boolean (default: False) Specifies whether or not to add to an existing Parameterset for this Datafile rather that overwriting or exiting. overwrite: boolean (default: False) Specifies whether to overwrite any exisiting parametersets for this datafile. Returns ------- None """ from .metadata import get_meta if acquire_datafile_lock(df_id): # Need to start a JVM in each thread check_and_start_jvm() try: javabridge.attach() log4j.basic_config() schema = Schema.objects.get(namespace__exact=schema_name) df = DataFile.objects.get(id=df_id) if DatafileParameterSet.objects\ .filter(schema=schema, datafile=df).exists(): if overwrite: psets = DatafileParameterSet.objects.get(schema=schema, datafile=df) logger.warning("Overwriting parametersets for %s", df.filename) for ps in psets: delete_old_parameterset(ps) else: logger.warning("Parametersets for %s already exist.", df.filename) return dfo = DataFileObject.objects.filter(datafile__id=df.id, verified=True).first() input_file_path = dfo.get_full_path() logger.debug("Processing file: %s", input_file_path) metadata_params = get_meta(input_file_path, **kwargs) if not metadata_params: logger.debug("No metadata to save") return for sm in metadata_params: ps = DatafileParameterSet(schema=schema, datafile=df) ps.save() logger.debug("Saving parameters for: %s", input_file_path) save_parameters(schema, ps, sm) except Exception as err: logger.exception(err) finally: release_datafile_lock(df_id) javabridge.detach()
def process_meta_file_output(df_id, schema_name, overwrite=False, **kwargs): """Extract metadata from a Datafile using the get_meta function and save the outputs as DatafileParameters. This function differs from process_meta in that it generates an output directory in the metadata store and passes it to the metadata processing func so that outputs (e.g., preview images or metadata files) can be saved. Parameters ---------- df_id: int ID of Datafile instance to process. schema_name: str Names of schema which describes ParameterNames add: Boolean (default: False) Specifies whether or not to add to an existing Parameterset for this Datafile rather that overwriting or exiting. overwrite: Boolean (default: False) Specifies whether to overwrite any exisiting parametersets for this datafile. Returns ------- None """ from .metadata import get_meta if acquire_datafile_lock(df_id): # Need to start a JVM in each thread check_and_start_jvm() try: javabridge.attach() log4j.basic_config() schema = Schema.objects.get(namespace__exact=schema_name) df = DataFile.objects.get(id=df_id) if DatafileParameterSet.objects\ .filter(schema=schema, datafile=df).exists(): if overwrite: psets = DatafileParameterSet.objects.get(schema=schema, datafile=df) logger.warning("Overwriting parametersets for %s", df.filename) for ps in psets: delete_old_parameterset(ps) else: logger.warning("Parametersets for %s already exist.", df.filename) return dfo = DataFileObject.objects.filter(datafile__id=df.id, verified=True).first() input_file_path = dfo.get_full_path() output_rel_path = os.path.join( os.path.dirname(urlparse.urlparse(dfo.uri).path), str(df.id)) output_path = os.path.join(settings.METADATA_STORE_PATH, output_rel_path) if not os.path.exists(output_path): os.makedirs(output_path) logger.debug("Processing file: %s" % input_file_path) metadata_params = get_meta(input_file_path, output_path, **kwargs) if not metadata_params: logger.debug("No metadata to save") return for sm in metadata_params: ps = DatafileParameterSet(schema=schema, datafile=df) ps.save() logger.debug("Saving parameters for: %s", input_file_path) save_parameters(schema, ps, sm) except Exception as err: logger.exception(err) finally: release_datafile_lock(df_id) javabridge.detach()
def process_meta(func, df, schema_name, overwrite=False, **kwargs): """Extract metadata from a Datafile using a provided function and save the outputs as DatafileParameters. Parameters ---------- func: Function Function to extract metadata from a file. Function must have input_file_path as an argument e.g.: def meta_proc(input_file_path, **kwargs): ... It must return a dict containing ParameterNames as keys and the Parameters to be saved as values. Parameters (values) can be singular strings/numerics or a list of strings/numeric. If it's a list, each element will be saved as a new DatafileParameter. df: tardis.tardis_portal.models.Datafile Datafile instance to process. schema_name: str Names of schema which describes ParameterNames add: boolean (default: False) Specifies whether or not to add to an existing Parameterset for this Datafile rather that overwriting or exiting. overwrite: boolean (default: False) Specifies whether to overwrite any exisiting parametersets for this datafile. Returns ------- None """ if acquire_datafile_lock(df.id): # Need to start a JVM in each thread check_and_start_jvm() try: javabridge.attach() log4j.basic_config() schema = Schema.objects.get(namespace__exact=schema_name) if DatafileParameterSet.objects\ .filter(schema=schema, datafile=df).exists(): if overwrite: psets = DatafileParameterSet.objects.get(schema=schema, datafile=df) logger.warning("Overwriting parametersets for %s" % df.filename) [delete_old_parameterset(ps) for ps in psets] else: logger.warning("Parametersets for %s already exist." % df.filename) return dfo = DataFileObject.objects.filter(datafile__id=df.id, verified=True).first() input_file_path = dfo.get_full_path() logger.debug("Processing file: %s" % input_file_path) metadata_params = func(input_file_path, **kwargs) if not metadata_params: logger.debug("No metadata to save") return for sm in metadata_params: ps = DatafileParameterSet(schema=schema, datafile=df) ps.save() logger.debug("Saving parameters for: %s" % input_file_path) save_parameters(schema, ps, sm) except Exception, e: logger.debug(e) finally:
def run_fcsplot(fcsplot_path, inputfilename, df_id, schema_id): """ Run fcsplot on a FCS file. """ cache = caches['celery-locks'] # Locking functions to ensure only one instance of # fcsplot operates on each datafile at a time. lock_id = 'fcs-filter-fcsplot-lock-%d' % df_id # cache.add fails if if the key already exists def acquire_lock(): return cache.add(lock_id, 'true', LOCK_EXPIRE) # cache.delete() can be slow, but we have to use it # to take advantage of using add() for atomic locking def release_lock(): cache.delete(lock_id) if acquire_lock(): try: schema = Schema.objects.get(id=schema_id) datafile = DataFile.objects.get(id=df_id) ps = DatafileParameterSet.objects.filter(schema=schema, datafile=datafile).first() if ps: prev_param = ParameterName.objects.get(schema__id=schema_id, name='previewImage') if DatafileParameter.objects.filter(parameterset=ps, name=prev_param).exists(): logger.info("FCS preview already exists for df_id %d" % df_id) return outputextension = "png" dfo = DataFileObject.objects.filter(datafile__id=df_id, verified=True).first() preview_image_rel_file_path = os.path.join( os.path.dirname(urlparse.urlparse(dfo.uri).path), str(df_id), '%s.%s' % (os.path.basename(inputfilename), outputextension)) preview_image_file_path = os.path.join( settings.METADATA_STORE_PATH, preview_image_rel_file_path) if not os.path.exists(os.path.dirname(preview_image_file_path)): os.makedirs(os.path.dirname(preview_image_file_path)) cmdline = "'%s' '%s' '%s' '%s'" % \ (sys.executable, fcsplot_path, inputfilename, preview_image_file_path) logger.info(cmdline) p = subprocess.Popen(cmdline, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) stdout, _ = p.communicate() if p.returncode != 0: logger.error(stdout) return try: ps = DatafileParameterSet.objects.get(schema__id=schema_id, datafile__id=df_id) except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, datafile=datafile) ps.save() param_name = ParameterName.objects.get(schema__id=schema_id, name='previewImage') dfp = DatafileParameter(parameterset=ps, name=param_name) dfp.string_value = preview_image_rel_file_path dfp.save() except: logger.error(traceback.format_exc()) finally: release_lock()
def run_showinf(showinf_path, inputfilename, df_id, schema_id): """ Run showinf on FCS file to extract metadata. """ cache = caches['celery-locks'] # Locking functions to ensure only one instance of # showinf operates on each datafile at a time. lock_id = 'fcs-filter-showinf-lock-%d' % df_id # cache.add fails if if the key already exists def acquire_lock(): return cache.add(lock_id, 'true', LOCK_EXPIRE) # cache.delete() can be slow, but we have to use it # to take advantage of using add() for atomic locking def release_lock(): cache.delete(lock_id) if acquire_lock(): try: schema = Schema.objects.get(id=schema_id) datafile = DataFile.objects.get(id=df_id) ps = DatafileParameterSet.objects.filter(schema=schema, datafile=datafile).first() if ps: file_param = ParameterName.objects.get(schema__id=schema_id, name='file') if DatafileParameter.objects.filter(parameterset=ps, name=file_param).exists(): logger.info("FCS metadata already exists for df_id %d" % df_id) return cmdline = "'%s' '%s' '%s'" % \ (sys.executable, showinf_path, inputfilename) logger.info(cmdline) p = subprocess.Popen(cmdline, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) stdout, _ = p.communicate() if p.returncode != 0: logger.error(stdout) return image_info_list = stdout.split('\n') metadata = { 'file': "", 'date': "", 'parametersAndStainsTable': ""} readingParametersAndStainsTable = False for line in image_info_list: m = re.match("File: (.*)", line) if m: metadata['file'] = m.group(1) m = re.match("Date: (.*)", line) if m: metadata['date'] = m.group(1) if line.strip() == "<ParametersAndStains>": readingParametersAndStainsTable = True elif line.strip() == "</ParametersAndStains>": readingParametersAndStainsTable = False elif readingParametersAndStainsTable: metadata['parametersAndStainsTable'] += line try: ps = DatafileParameterSet.objects.get(schema__id=schema_id, datafile__id=df_id) except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, datafile=datafile) ps.save() param_name_strings = ['file', 'date', 'parametersAndStainsTable'] for param_name_str in param_name_strings: try: param_name = ParameterName.objects.get(schema__id=schema_id, name=param_name_str) except ParameterName.DoesNotExist: logger.error("Didn't find parameter %s in schema id %s" % (param_name_str, schema_id)) dfp = DatafileParameter(parameterset=ps, name=param_name) dfp.string_value = metadata[param_name_str] dfp.save() except: logger.error(traceback.format_exc()) finally: release_lock()
def run_bfconvert(bfconvert_path, inputfilename, df_id, schema_id): """ Run Bioformats bfconvert on an image file. """ cache = caches['celery-locks'] # Locking functions to ensure only one instance of # bfconvert operates on each datafile at a time. lock_id = 'bioformats-filter-bfconvert-lock-%d' % df_id # cache.add fails if if the key already exists def acquire_lock(): return cache.add(lock_id, 'true', LOCK_EXPIRE) # cache.delete() can be slow, but we have to use it # to take advantage of using add() for atomic locking def release_lock(): cache.delete(lock_id) if acquire_lock(): try: schema = Schema.objects.get(id=schema_id) datafile = DataFile.objects.get(id=df_id) ps = DatafileParameterSet.objects.filter(schema=schema, datafile=datafile).first() if ps: prev_param = ParameterName.objects.get(schema__id=schema_id, name='previewImage') if DatafileParameter.objects.filter(parameterset=ps, name=prev_param).exists(): logger.info("Preview image already exists for df_id %d" % df_id) return outputextension = "png" dfo = DataFileObject.objects.filter(datafile__id=df_id, verified=True).first() preview_image_rel_file_path = os.path.join( os.path.dirname(urlparse.urlparse(dfo.uri).path), str(df_id), '%s.%s' % (os.path.basename(inputfilename), outputextension)) preview_image_file_path = os.path.join( settings.METADATA_STORE_PATH, preview_image_rel_file_path) if not os.path.exists( os.path.dirname(preview_image_file_path)): os.makedirs(os.path.dirname(preview_image_file_path)) # Extract only the first image from the stack: cmdline = "'%s' -series 0 -timepoint 0 -channel 0 -z 0 " \ "'%s' '%s' -overwrite" %\ (bfconvert_path, inputfilename, preview_image_file_path) logger.info(cmdline) p = subprocess.Popen(cmdline, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) stdout, _ = p.communicate() if p.returncode != 0: logger.error(stdout) return os.rename(preview_image_file_path, preview_image_file_path + '.bioformats') # Run ImageMagick convert with contrast-stretch on an image file. # We could probably do this with the Wand Python module instead. cmdline = "convert '%s.bioformats' -contrast-stretch 0 '%s'" %\ (preview_image_file_path, preview_image_file_path) logger.info(cmdline) p = subprocess.Popen(cmdline, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) stdout, _ = p.communicate() os.unlink(preview_image_file_path + '.bioformats') if p.returncode != 0: logger.error(stdout) return try: ps = DatafileParameterSet.objects.get(schema__id=schema_id, datafile__id=df_id) except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, datafile=datafile) ps.save() param_name = ParameterName.objects.get(schema__id=schema_id, name='previewImage') dfp = DatafileParameter(parameterset=ps, name=param_name) dfp.string_value = preview_image_rel_file_path dfp.save() finally: release_lock()
class ParameterSetManagerTestCase(TestCase): def setUp(self): from django.contrib.auth.models import User from tempfile import mkdtemp user = '******' pwd = 'secret' email = '' self.user = User.objects.create_user(user, email, pwd) self.test_dir = mkdtemp() self.exp = Experiment(title='test exp1', institution_name='monash', created_by=self.user) self.exp.save() self.dataset = Dataset(description="dataset description...") self.dataset.save() self.dataset.experiments.add(self.exp) self.dataset.save() self.datafile = DataFile(dataset=self.dataset, filename="testfile.txt", size="42", md5sum='bogus') self.datafile.save() self.dfo = DataFileObject( datafile=self.datafile, storage_box=self.datafile.get_default_storage_box(), uri="1/testfile.txt") self.dfo.save() self.schema = Schema( namespace="http://localhost/psmtest/df/", name="Parameter Set Manager", type=3) self.schema.save() self.parametername1 = ParameterName( schema=self.schema, name="parameter1", full_name="Parameter 1") self.parametername1.save() self.parametername2 = ParameterName( schema=self.schema, name="parameter2", full_name="Parameter 2", data_type=ParameterName.NUMERIC) self.parametername2.save() self.parametername3 = ParameterName( schema=self.schema, name="parameter3", full_name="Parameter 3", data_type=ParameterName.DATETIME) self.parametername3.save() self.datafileparameterset = DatafileParameterSet( schema=self.schema, datafile=self.datafile) self.datafileparameterset.save() self.datafileparameter1 = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername1, string_value="test1") self.datafileparameter1.save() self.datafileparameter2 = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername2, numerical_value=2) self.datafileparameter2.save() # Create a ParameterName and Parameter of type LINK to an experiment self.parametername_exp_link = ParameterName( schema=self.schema, name="exp_link", full_name="This parameter is a experiment LINK", data_type=ParameterName.LINK) self.parametername_exp_link.save() self.exp_link_param = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername_exp_link) exp_url = self.exp.get_absolute_url() # /experiment/view/1/ self.exp_link_param.set_value(exp_url) self.exp_link_param.save() # Create a ParameterName and Parameter of type LINK to a dataset self.parametername_dataset_link = ParameterName( schema=self.schema, name="dataset_link", full_name="This parameter is a dataset LINK", data_type=ParameterName.LINK) self.parametername_dataset_link.save() self.dataset_link_param = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername_dataset_link) dataset_url = self.dataset.get_absolute_url() # /dataset/1/ self.dataset_link_param.set_value(dataset_url) self.dataset_link_param.save() # Create a ParameterName type LINK to an unresolvable (non-URL) # free-text value self.parametername_unresolvable_link = ParameterName( schema=self.schema, name="freetext_link", full_name="This parameter is a non-URL LINK", data_type=ParameterName.LINK) self.parametername_unresolvable_link.save() def tearDown(self): self.exp.delete() self.user.delete() self.parametername1.delete() self.parametername2.delete() self.parametername3.delete() self.parametername_exp_link.delete() self.parametername_dataset_link.delete() self.parametername_unresolvable_link.delete() self.schema.delete() def test_existing_parameterset(self): psm = ParameterSetManager(parameterset=self.datafileparameterset) self.assertTrue(psm.get_schema().namespace == "http://localhost/psmtest/df/") self.assertTrue(psm.get_param("parameter1").string_value == "test1") self.assertTrue(psm.get_param("parameter2", True) == 2) def test_new_parameterset(self): psm = ParameterSetManager(parentObject=self.datafile, schema="http://localhost/psmtest/df2/") self.assertTrue(psm.get_schema().namespace == "http://localhost/psmtest/df2/") psm.set_param("newparam1", "test3", "New Parameter 1") self.assertTrue(psm.get_param("newparam1").string_value == "test3") self.assertTrue(psm.get_param("newparam1").name.full_name == "New Parameter 1") psm.new_param("newparam1", "test4") self.assertTrue(len(psm.get_params("newparam1", True)) == 2) psm.set_param_list("newparam2", ("a", "b", "c", "d")) self.assertTrue(len(psm.get_params("newparam2")) == 4) psm.set_params_from_dict( {"newparam2": "test5", "newparam3": 3}) self.assertTrue(psm.get_param("newparam2", True) == "test5") # the newparam3 gets created and '3' is set to a string_value # since once cannot assume that an initial numeric value # will imply continuing numeric type for this new param self.assertTrue(psm.get_param("newparam3").string_value == '3') psm.delete_params("newparam1") self.assertTrue(len(psm.get_params("newparam1", True)) == 0) def test_link_parameter_type(self): """ Test that Parameter.link_gfk (GenericForeignKey) is correctly assigned after using Parameter.set_value(some_url) for a LINK Parameter. """ psm = ParameterSetManager(parameterset=self.datafileparameterset) # Check link to experiment exp_url = self.exp.get_absolute_url() # /experiment/view/1/ self.assertTrue(psm.get_param("exp_link").string_value == exp_url) self.assertTrue(psm.get_param("exp_link").link_id == self.exp.id) exp_ct = ContentType.objects.get(model__iexact="experiment") self.assertTrue(psm.get_param("exp_link").link_ct == exp_ct) self.assertTrue(psm.get_param("exp_link").link_gfk == self.exp) # Check link to dataset dataset_url = self.dataset.get_absolute_url() # /dataset/1/ self.assertTrue(psm.get_param("dataset_link").string_value == dataset_url) self.assertTrue(psm.get_param("dataset_link").link_id == self.dataset.id) dataset_ct = ContentType.objects.get(model__iexact="dataset") self.assertTrue(psm.get_param("dataset_link").link_ct == dataset_ct) self.assertTrue(psm.get_param("dataset_link").link_gfk == self.dataset) def test_link_parameter_type_extra(self): # make a second ParameterSet for testing some variations # in URL values self.datafileparameterset2 = DatafileParameterSet( schema=self.schema, datafile=self.datafile) self.datafileparameterset2.save() psm = ParameterSetManager(parameterset=self.datafileparameterset2) self.dataset_link_param2 = DatafileParameter( parameterset=self.datafileparameterset2, name=self.parametername_dataset_link) # /dataset/1 - no trailing slash dataset_url = self.dataset.get_absolute_url() self.dataset_link_param2.set_value(dataset_url) self.dataset_link_param2.save() # Check link_id/link_ct/link_gfk to dataset self.assertTrue(psm.get_param("dataset_link").link_id == self.dataset.id) dataset_ct = ContentType.objects.get(model__iexact="dataset") self.assertTrue(psm.get_param("dataset_link").link_ct == dataset_ct) self.assertTrue(psm.get_param("dataset_link").link_gfk == self.dataset) # Test links of the form /api/v1/experiment/<experiment_id>/ self.exp_link_param2 = DatafileParameter( parameterset=self.datafileparameterset2, name=self.parametername_exp_link) exp_url = '/api/v1/experiment/%s/' % self.exp.id self.exp_link_param2.set_value(exp_url) self.exp_link_param2.save() # Check link_id/link_ct/link_gfk to experiment self.assertTrue(psm.get_param("exp_link").link_id == self.exp.id) exp_ct = ContentType.objects.get(model__iexact="experiment") self.assertTrue(psm.get_param("exp_link").link_ct == exp_ct) self.assertTrue(psm.get_param("exp_link").link_gfk == self.exp) def test_unresolvable_link_parameter(self): """ Test that LINK Parameters that can't be resolved to a model (including non-URL values) still work. """ self.datafileparameterset3 = DatafileParameterSet( schema=self.schema, datafile=self.datafile) self.datafileparameterset3.save() psm = ParameterSetManager(parameterset=self.datafileparameterset3) # Create a Parameter of type LINK to an unresolvable (non-URL) # free-text value self.freetext_link_param = DatafileParameter( parameterset=self.datafileparameterset3, name=self.parametername_unresolvable_link) self.assertRaises(SuspiciousOperation, lambda: self.freetext_link_param.set_value( "FREETEXT_ID_123")) def test_tz_naive_date_handling(self): """ Ensure that dates are handling in a timezone-aware way. """ psm = ParameterSetManager(parameterset=self.datafileparameterset) psm.new_param("parameter3", str(datetime(1970, 01, 01, 10, 0, 0))) expect(psm.get_param("parameter3", True))\ .to_equal(datetime(1970, 01, 01, 0, 0, 0, tzinfo=pytz.utc)) def test_tz_aware_date_handling(self): """ Ensure that dates are handling in a timezone-aware way. """ psm = ParameterSetManager(parameterset=self.datafileparameterset) psm.new_param("parameter3", '1970-01-01T08:00:00+08:00') expect(psm.get_param("parameter3", True))\ .to_equal(datetime(1970, 01, 01, 0, 0, 0, tzinfo=pytz.utc))
def process_meta_file_output(func, df, schema_name, overwrite=False, **kwargs): """Extract metadata from a Datafile using a provided function and save the outputs as DatafileParameters. This function differs from process_meta in that it generates an output directory in the metadata store and passes it to the metadata processing func so that outputs (e.g., preview images or metadata files) can be saved. Parameters ---------- func: Function Function to extract metadata from a file. Function must have input_file_path and output_path as arguments e.g.: def meta_proc(input_file_path, output_path, **kwargs): ... It must return a dict containing ParameterNames as keys and the Parameters to be saved as values. Parameters (values) can be singular strings/numerics or a list of strings/numeric. If it's a list, each element will be saved as a new DatafileParameter. df: tardis.tardis_portal.models.Datafile Datafile instance to process. schema_name: str Names of schema which describes ParameterNames add: Boolean (default: False) Specifies whether or not to add to an existing Parameterset for this Datafile rather that overwriting or exiting. overwrite: Boolean (default: False) Specifies whether to overwrite any exisiting parametersets for this datafile. Returns ------- None """ # import ipdb; ipdb.set_trace() # Need to start a JVM in each thread global mtbf_jvm_started if not mtbf_jvm_started: logger.debug("Starting a new JVM") javabridge.start_vm(class_path=bioformats.JARS, max_heap_size='4G', run_headless=True) mtbf_jvm_started = True try: javabridge.attach() log4j.basic_config() schema = Schema.objects.get(namespace__exact=schema_name) if DatafileParameterSet.objects\ .filter(schema=schema, datafile=df).exists(): if overwrite: psets = DatafileParameterSet.objects.get(schema=schema, datafile=df) logger.warning("Overwriting parametersets for %s" % df.filename) [delete_old_parameterset(ps) for ps in psets] else: logger.warning("Parametersets for %s already exist." % df.filename) return dfo = DataFileObject.objects.filter(datafile__id=df.id, verified=True).first() input_file_path = dfo.get_full_path() output_rel_path = os.path.join( os.path.dirname(input_file_path), str(df.id)) output_path = os.path.join( settings.METADATA_STORE_PATH, output_rel_path) if not os.path.exists(output_path): os.makedirs(output_path) logger.debug("Processing file: %s" % input_file_path) metadata_params = func(input_file_path, output_path, **kwargs) if not metadata_params: logger.debug("No metadata to save") return for sm in metadata_params: ps = DatafileParameterSet(schema=schema, datafile=df) ps.save() logger.debug("Saving parameters for: %s" % input_file_path) save_parameters(schema, ps, sm) except Exception, e: logger.debug(e)
class ContextualViewTest(TestCase): def setUp(self): """ setting up essential objects, copied from tests above """ user = '******' pwd = 'secret' email = '' self.user = User.objects.create_user(user, email, pwd) self.userProfile = UserProfile(user=self.user).save() self.exp = Experiment(title='test exp1', institution_name='monash', created_by=self.user) self.exp.save() self.acl = ObjectACL( pluginId=django_user, entityId=str(self.user.id), content_object=self.exp, canRead=True, isOwner=True, aclOwnershipType=ObjectACL.OWNER_OWNED, ) self.acl.save() self.dataset = Dataset(description='dataset description...') self.dataset.save() self.dataset.experiments.add(self.exp) self.dataset.save() self.dataset_file = Dataset_File(dataset=self.dataset, size=42, filename="foo", md5sum="junk") self.dataset_file.save() self.testschema = Schema(namespace="http://test.com/test/schema", name="Test View", type=Schema.DATAFILE, hidden=True) self.testschema.save() self.dfps = DatafileParameterSet(dataset_file=self.dataset_file, schema=self.testschema) self.dfps.save() def tearDown(self): self.user.delete() self.exp.delete() self.dataset.delete() self.dataset_file.delete() self.testschema.delete() self.dfps.delete() self.acl.delete() def testDetailsDisplay(self): """ test display of view for an existing schema and no display for an undefined one. """ from tardis.tardis_portal.views import display_datafile_details request = flexmock(user=self.user, groups=[("testgroup",flexmock())]) with self.settings(DATAFILE_VIEWS=[("http://test.com/test/schema", "/test/url"), ("http://does.not.exist", "/false/url")]): response = display_datafile_details(request, dataset_file_id=self.dataset_file.id) self.assertEqual(response.status_code, 200) self.assertTrue("/ajax/parameters/" in response.content) self.assertTrue("/test/url" in response.content) self.assertFalse("/false/url" in response.content)
def test_parameter(self): exp = Experiment( title='test exp1', institution_name='Australian Synchrotron', approved=True, created_by=self.user, public_access=Experiment.PUBLIC_ACCESS_NONE, ) exp.save() dataset = Dataset(description="dataset description") dataset.save() dataset.experiments.add(exp) dataset.save() df_file = DataFile(dataset=dataset, filename='file.txt', size=42, md5sum='bogus') df_file.save() df_schema = Schema(namespace='http://www.cern.ch/felzmann/schema1.xml', type=Schema.DATAFILE) df_schema.save() ds_schema = Schema(namespace='http://www.cern.ch/felzmann/schema2.xml', type=Schema.DATASET) ds_schema.save() exp_schema = Schema( namespace='http://www.cern.ch/felzmann/schema3.xml', type=Schema.EXPERIMENT) exp_schema.save() df_parname = ParameterName(schema=df_schema, name='name', full_name='full_name', units='image/jpg', data_type=ParameterName.FILENAME) df_parname.save() ds_parname = ParameterName(schema=ds_schema, name='name', full_name='full_name', units='image/jpg', data_type=ParameterName.FILENAME) ds_parname.save() exp_parname = ParameterName(schema=exp_schema, name='name', full_name='full_name', units='image/jpg', data_type=ParameterName.FILENAME) exp_parname.save() df_parset = DatafileParameterSet(schema=df_schema, datafile=df_file) df_parset.save() ds_parset = DatasetParameterSet(schema=ds_schema, dataset=dataset) ds_parset.save() exp_parset = ExperimentParameterSet(schema=exp_schema, experiment=exp) exp_parset.save() with self.settings(METADATA_STORE_PATH=os.path.dirname(__file__)): filename = 'test.jpg' df_parameter = DatafileParameter(name=df_parname, parameterset=df_parset, string_value=filename) df_parameter.save() ds_parameter = DatasetParameter(name=ds_parname, parameterset=ds_parset, string_value=filename) ds_parameter.save() exp_parameter = ExperimentParameter(name=exp_parname, parameterset=exp_parset, string_value=filename) exp_parameter.save() self.assertEqual( "<a href='/display/DatafileImage/load/%i/' target='_blank'><img style='width: 300px;' src='/display/DatafileImage/load/%i/' /></a>" % # noqa (df_parameter.id, df_parameter.id), df_parameter.get()) self.assertEqual( "<a href='/display/DatasetImage/load/%i/' target='_blank'><img style='width: 300px;' src='/display/DatasetImage/load/%i/' /></a>" % # noqa (ds_parameter.id, ds_parameter.id), ds_parameter.get()) self.assertEqual( "<a href='/display/ExperimentImage/load/%i/' target='_blank'><img style='width: 300px;' src='/display/ExperimentImage/load/%i/' /></a>" % # noqa (exp_parameter.id, exp_parameter.id), exp_parameter.get())