Beispiel #1
0
    def saveDicomMetadata(self, instance, schema, metadata):
        """Save all the metadata to a Dataset_Files paramamter set.
        """
        # FIXME reenable this...
        #parameters = self.getParameters(schema, metadata)
        #if not parameters:
        #    logger.debug('dicompng saveDicomMetadata: parameters == NULL :-(')
        #    return None

        logger = logging.getLogger(__name__)
        logger.setLevel(10)
        logger.debug('dicompng saveDicomMetadata...')

        try:
            ps = DatafileParameterSet.objects.get(schema=schema, dataset_file=instance)
            return ps  # if already exists then just return it
        except DatafileParameterSet.DoesNotExist:
            ps = DatafileParameterSet(schema=schema, dataset_file=instance)
            ps.save()

        logger.debug('dicompng UP TO HERE, WHAT NEXT?')

        try:
            logger.debug('dicompng UP TO HERE2')
            dfp = DatafileParameter(parameterset=ps, name=ParameterName.objects.get(name='dump'))
            logger.debug('dicompng UP TO HERE3')
            dfp.string_value = metadata
            logger.debug('dicompng UP TO HERE4: ' + metadata)
            dfp.save()
            logger.debug('dicompng UP TO HERE5')
        except Exception, e:
            logger.debug('ZZZ' + str(e))
            return None
    def test_000_update_df_status_offline(self, mock_stat):
        """update_df_status should check the online status of
        preferred DFOs for all previously online datafiles and
        update online Parameter to 'False' for any offline files."""
        df1 = DataFile(dataset=self.dataset,
                       filename="test_df.jpg")
        df1.save()
        dfo1 = DataFileObject(datafile=df1,
                              storage_box=self.sbox1,
                              uri="stream/test.jpg",
                              verified=True)
        dfo1.save()

        schema = Schema.objects.get(namespace=HSM_DATAFILE_NAMESPACE)
        ps = DatafileParameterSet(schema=schema, datafile=df1)
        ps.save()

        param_name = ParameterName.objects.get(schema=schema, name="online")
        param = DatafileParameter(parameterset=ps, name=param_name)
        param.string_value = True
        param.save()

        mock_stat.return_value = Stats(st_size=10000,
                                       st_blocks=0,
                                       st_mtime=datetime.now())
        update_df_status()

        params = DatafileParameter.objects.filter(
            parameterset__schema=schema,
            parameterset__datafile=df1)

        self.assertEquals(params.count(), 1)
        self.assertEquals(params[0].string_value, "False")
    def saveSpectraMetadata(self, instance, schema, metadata):
        """Save all the metadata to a Dataset_Files paramamter set.
        """
        parameters = self.getParamaters(schema, metadata)
        if not parameters:
            return None
        
        (ps, created) = DatafileParameterSet.objects.get_or_create(schema=schema, dataset_file=instance)
        if created: # new object was created
            ps.save()
        else: # if parameter set already exists then just return it
            return ps 

        # save datafile parameters
        for p in parameters:
            if p.name in metadata:
                dfp = DatafileParameter(parameterset=ps,
                                        name=p)
                if p.isNumeric():
                    dfp.numerical_value = metadata[p.name][0]
                else:
                    dfp.string_value = metadata[p.name][0]
                dfp.save()
                
        return ps
    def test_003_update_df_status_skip_offline(self, mock_stat, mock_df_online):
        """update_df_status should skip any files that have previously
        marked as offline."""
        df2 = DataFile(dataset=self.dataset,
                       filename="test_df2.jpg")
        df2.save()
        dfo2 = DataFileObject(datafile=df2,
                              storage_box=self.sbox1,
                              uri="stream/test_df2.jpg",
                              verified=True)
        dfo2.save()
        # df2.verify()

        schema = Schema.objects.get(namespace=HSM_DATAFILE_NAMESPACE)
        ps2 = DatafileParameterSet(schema=schema, datafile=df2)
        ps2.save()

        param_name = ParameterName.objects.get(schema=schema, name="online")
        param2 = DatafileParameter(parameterset=ps2, name=param_name)
        param2.string_value = False
        param2.save()

        mock_stat.return_value = Stats(st_size=10000,
                                       st_blocks=100,
                                       st_mtime=datetime.now())
        update_df_status()

        # assert that the df_online method wasn't called
        self.assertEquals(mock_df_online.call_count, 0)
Beispiel #5
0
    def saveExifMetadata(self, instance, schema, metadata):
        """Save all the metadata to a Dataset_Files paramamter set.
        """
        parameters = self.getParamaters(schema, metadata)
        if not parameters:
            return None

        try:
            ps = DatafileParameterSet.objects.get(schema=schema,
                                                  dataset_file=instance)
            return ps  # if already exists then just return it
        except DatafileParameterSet.DoesNotExist:
            ps = DatafileParameterSet(schema=schema,
                                      dataset_file=instance)
            ps.save()

        for p in parameters:
            if p.name in metadata:
                dfp = DatafileParameter(parameterset=ps,
                                        name=p)
                if p.isNumeric():
                    dfp.numerical_value = metadata[p.name]
                else:
                    dfp.string_value = metadata[p.name]
                dfp.save()
        return ps
    def saveDiffractionImageMetadata(self, instance, schema, metadata):
        """Save all the metadata to a DataFiles paramamter set.
        """
        parameters = self.getParameters(schema, metadata)
        if not parameters:
            return None

        try:
            ps = DatafileParameterSet.objects.get(schema=schema,
                                                  datafile=instance)
            return ps  # if already exists then just return it
        except DatafileParameterSet.DoesNotExist:
            ps = DatafileParameterSet(schema=schema, datafile=instance)
            ps.save()

        for p in parameters:
            if p.name in metadata:
                dfp = DatafileParameter(parameterset=ps, name=p)
                if p.isNumeric():
                    if metadata[p.name] != '':
                        dfp.numerical_value = metadata[p.name]
                        dfp.save()
                else:
                    dfp.string_value = metadata[p.name]
                    dfp.save()

        return ps
    def saveFlexstationMetadata(self, instance, schema, metadata):
        """Saves or overwrites the datafile's metadata to a Dataset_Files parameter set in the database.
        """
        logger.info('Saving Metadata')

        parameters = self.getParameters(schema, metadata)
        if not parameters:
            return None

        try:
            ps = DatafileParameterSet.objects.get(schema=schema,
                                                  dataset_file=instance)
            return ps  # if already exists then just return it
        except DatafileParameterSet.DoesNotExist:
            ps = DatafileParameterSet(schema=schema, dataset_file=instance)
            ps.save()

        for p in parameters:
            if p.name in metadata:
                dfp = DatafileParameter(parameterset=ps, name=p)
                if p.isNumeric():
                    if metadata[p.name] != '':
                        dfp.numerical_value = metadata[p.name]
                        dfp.save()
                else:
                    dfp.string_value = metadata[p.name].decode('cp1252')
                    dfp.save()

        return ps
Beispiel #8
0
def create_df_status(datafile, schema_name, min_file_size):
    """Post-save celery task that checks online status of new file and create
    HSM metadata to track online status.

    Parameters
    ----------
    datafile: DataFile
        datafile to check and create online/offline status
        metadata for
    schema_name: Schema
        name of Schema which describes ParameterNames
    min_file_size : int
        minimum size of files to check HSM status of. This
        param is simply passed on to df_online.

    Returns
    -------
    None
    """
    if datafile.verified:
        with DatafileLock(datafile, "datafile-%s" % datafile.id) as lock:
            if lock:
                schema = Schema.objects.get(namespace=schema_name)
                if DatafileParameterSet.objects.filter(
                        schema=schema, datafile=datafile).exists():
                    LOGGER.debug(
                        """HSM DatafileParameterSet already exists for: %s""",
                        datafile.id
                    )
                    return

                ps = DatafileParameterSet(schema=schema, datafile=datafile)
                ps.save()

                param_name = ParameterName.objects.get(
                    schema=schema,
                    name="online"
                )

                dfp = DatafileParameter(parameterset=ps, name=param_name)
                dfp.string_value = str(df_online(datafile, min_file_size))
                dfp.save()

    else:
        LOGGER.warning(
            """Cannot determine online/offline status for datafile %s "
            "is not verified""",
            datafile.id
        )
    def saveFlexstationMetadata(self, instance, schema, metadata):
        """Saves or overwrites the datafile's metadata to a Dataset_Files parameter set in the database.
        """
        logger.info('Saving Metadata')

        parameters = self.getParameters(schema, metadata)
        if not parameters:
            return None

        try:
            ps = DatafileParameterSet.objects.get(schema=schema,
                                                  dataset_file=instance)
            return ps  # if already exists then just return it
        except DatafileParameterSet.DoesNotExist:
            ps = DatafileParameterSet(schema=schema,
                                      dataset_file=instance)
            ps.save()

        for p in parameters:
            if p.name in metadata:
                dfp = DatafileParameter(parameterset=ps,
                                        name=p)
                if p.isNumeric():
                    if metadata[p.name] != '':
                        dfp.numerical_value = metadata[p.name]
                        dfp.save()
                else:
                    dfp.string_value = metadata[p.name].decode('cp1252')
                    dfp.save()

        return ps
Beispiel #10
0
    def test_unresolvable_link_parameter(self):
        """
        Test that LINK Parameters that can't be resolved to a model (including
        non-URL values) still work.
        """
        self.datafileparameterset3 = DatafileParameterSet(
            schema=self.schema, datafile=self.datafile)
        self.datafileparameterset3.save()

        psm = ParameterSetManager(parameterset=self.datafileparameterset3)

        # Create a Parameter of type LINK to an unresolvable (non-URL)
        # free-text value
        self.freetext_link_param = DatafileParameter(
            parameterset=self.datafileparameterset3,
            name=self.parametername_unresolvable_link)
        self.assertRaises(
            SuspiciousOperation,
            lambda: self.freetext_link_param.set_value("FREETEXT_ID_123"))
    def test_link_parameter_type_extra(self):
        # make a second ParameterSet for testing some variations
        # in URL values
        self.datafileparameterset2 = DatafileParameterSet(
            schema=self.schema, datafile=self.datafile)
        self.datafileparameterset2.save()

        psm = ParameterSetManager(parameterset=self.datafileparameterset2)

        self.dataset_link_param2 = DatafileParameter(
            parameterset=self.datafileparameterset2,
            name=self.parametername_dataset_link)
        # /dataset/1 - no trailing slash
        dataset_url = self.dataset.get_absolute_url()
        self.dataset_link_param2.set_value(dataset_url)
        self.dataset_link_param2.save()

        # Check link_id/link_ct/link_gfk to dataset
        self.assertTrue(psm.get_param("dataset_link").link_id ==
                        self.dataset.id)

        dataset_ct = ContentType.objects.get(model__iexact="dataset")
        self.assertTrue(psm.get_param("dataset_link").link_ct == dataset_ct)

        self.assertTrue(psm.get_param("dataset_link").link_gfk == self.dataset)

        # Test links of the form /api/v1/experiment/<experiment_id>/
        self.exp_link_param2 = DatafileParameter(
            parameterset=self.datafileparameterset2,
            name=self.parametername_exp_link)
        exp_url = '/api/v1/experiment/%s/' % self.exp.id
        self.exp_link_param2.set_value(exp_url)
        self.exp_link_param2.save()

        # Check link_id/link_ct/link_gfk to experiment
        self.assertTrue(psm.get_param("exp_link").link_id ==
                        self.exp.id)

        exp_ct = ContentType.objects.get(model__iexact="experiment")
        self.assertTrue(psm.get_param("exp_link").link_ct == exp_ct)

        self.assertTrue(psm.get_param("exp_link").link_gfk == self.exp)
Beispiel #12
0
    def test_link_parameter_type_extra(self):
        # make a second ParameterSet for testing some variations
        # in URL values
        self.datafileparameterset2 = DatafileParameterSet(
            schema=self.schema, datafile=self.datafile)
        self.datafileparameterset2.save()

        psm = ParameterSetManager(parameterset=self.datafileparameterset2)

        self.dataset_link_param2 = DatafileParameter(
            parameterset=self.datafileparameterset2,
            name=self.parametername_dataset_link)
        # /dataset/1 - no trailing slash
        dataset_url = self.dataset.get_absolute_url()
        self.dataset_link_param2.set_value(dataset_url)
        self.dataset_link_param2.save()

        # Check link_id/link_ct/link_gfk to dataset
        self.assertTrue(
            psm.get_param("dataset_link").link_id == self.dataset.id)

        dataset_ct = ContentType.objects.get(model__iexact="dataset")
        self.assertTrue(psm.get_param("dataset_link").link_ct == dataset_ct)

        self.assertTrue(psm.get_param("dataset_link").link_gfk == self.dataset)

        # Test links of the form /api/v1/experiment/<experiment_id>/
        self.exp_link_param2 = DatafileParameter(
            parameterset=self.datafileparameterset2,
            name=self.parametername_exp_link)
        exp_url = '/api/v1/experiment/%s/' % self.exp.id
        self.exp_link_param2.set_value(exp_url)
        self.exp_link_param2.save()

        # Check link_id/link_ct/link_gfk to experiment
        self.assertTrue(psm.get_param("exp_link").link_id == self.exp.id)

        exp_ct = ContentType.objects.get(model__iexact="experiment")
        self.assertTrue(psm.get_param("exp_link").link_ct == exp_ct)

        self.assertTrue(psm.get_param("exp_link").link_gfk == self.exp)
Beispiel #13
0
def get_parse_status(squash_datafile, ns):
    from tardis.tardis_portal.models import DatafileParameter
    try:
        status = squash_datafile.datafileparameterset_set.get(
            schema__namespace=ns
        ).datafileparameter_set.get(
            name__name='parse_status')
    except DatafileParameter.DoesNotExist:
        from tardis.tardis_portal.models import (Schema, ParameterName)
        schema = Schema.objects.get(type=Schema.DATAFILE,
                                    namespace=ns)
        pn, created = ParameterName.objects.get_or_create(
            name='parse_status',
            schema=schema,
            data_type=ParameterName.STRING)
        ps = squash_datafile.datafileparameterset_set.get(schema=schema)
        status = DatafileParameter(parameterset=ps,
                                   name=pn,
                                   string_value='new')
        status.save()
    return status
    def test_002_update_df_status_skip_unverified(self, mock_stat, df_online):
        """update_df_status should skip files that are unverified"""
        df2 = DataFile(dataset=self.dataset,
                       filename="test_df2.jpg")
        df2.save()
        dfo2 = DataFileObject(datafile=df2,
                              storage_box=self.sbox1,
                              uri="stream/test_df2.jpg")
        dfo2.save()

        schema = Schema.objects.get(namespace=HSM_DATAFILE_NAMESPACE)
        ps2 = DatafileParameterSet(schema=schema, datafile=df2)
        ps2.save()

        param_name = ParameterName.objects.get(schema=schema, name="online")
        param2 = DatafileParameter(parameterset=ps2, name=param_name)
        param2.string_value = True
        param2.save()

        mock_stat.return_value = Stats(st_size=10000,
                                       st_blocks=100,
                                       st_mtime=datetime.now())
        update_df_status()
        df_online.assert_not_called()
Beispiel #15
0
 def savep(paramk, paramv):
     param_name = ParameterName.objects.get(schema__id=schema.id,
                                            name=paramk)
     dfp = DatafileParameter(parameterset=param_set, name=param_name)
     if paramv != "":
         if param_name.isNumeric():
             dfp.numerical_value = paramv
         else:
             dfp.string_value = paramv
         dfp.save()
    def test_unresolvable_link_parameter(self):
        """
        Test that LINK Parameters that can't be resolved to a model (including
        non-URL values) still work.
        """
        self.datafileparameterset3 = DatafileParameterSet(
                schema=self.schema, datafile=self.datafile)
        self.datafileparameterset3.save()

        psm = ParameterSetManager(parameterset=self.datafileparameterset3)

        # Create a Parameter of type LINK to an unresolvable (non-URL)
        # free-text value
        self.freetext_link_param = DatafileParameter(
                parameterset=self.datafileparameterset3,
                name=self.parametername_unresolvable_link)
        self.assertRaises(SuspiciousOperation,
                          lambda: self.freetext_link_param.set_value(
                              "FREETEXT_ID_123"))
Beispiel #17
0
def run_showinf(showinf_path, inputfilename, df_id, schema_id):
    """
    Run showinf on FCS file to extract metadata.
    """
    cache = caches['celery-locks']

    # Locking functions to ensure only one instance of
    # showinf operates on each datafile at a time.
    lock_id = 'fcs-filter-showinf-lock-%d' % df_id

    # cache.add fails if if the key already exists
    def acquire_lock(): return cache.add(lock_id, 'true', LOCK_EXPIRE)
    # cache.delete() can be slow, but we have to use it
    # to take advantage of using add() for atomic locking
    def release_lock(): cache.delete(lock_id)

    if acquire_lock():
        try:
            schema = Schema.objects.get(id=schema_id)
            datafile = DataFile.objects.get(id=df_id)
            ps = DatafileParameterSet.objects.filter(schema=schema,
                                                     datafile=datafile).first()
            if ps:
                file_param = ParameterName.objects.get(schema__id=schema_id,
                                                       name='file')
                if DatafileParameter.objects.filter(parameterset=ps,
                                                    name=file_param).exists():
                    logger.info("FCS metadata already exists for df_id %d"
                                % df_id)
                    return

            cmdline = "'%s' '%s' '%s'" % \
                (sys.executable, showinf_path, inputfilename)
            logger.info(cmdline)
            p = subprocess.Popen(cmdline, stdout=subprocess.PIPE,
                                 stderr=subprocess.STDOUT, shell=True)
            stdout, _ = p.communicate()
            if p.returncode != 0:
                logger.error(stdout)
                return
            image_info_list = stdout.split('\n')

            metadata = {
                'file': "",
                'date': "",
                'parametersAndStainsTable': ""}

            readingParametersAndStainsTable = False

            for line in image_info_list:
                m = re.match("File: (.*)", line)
                if m:
                    metadata['file'] = m.group(1)
                m = re.match("Date: (.*)", line)
                if m:
                    metadata['date'] = m.group(1)
                if line.strip() == "<ParametersAndStains>":
                    readingParametersAndStainsTable = True
                elif line.strip() == "</ParametersAndStains>":
                    readingParametersAndStainsTable = False
                elif readingParametersAndStainsTable:
                    metadata['parametersAndStainsTable'] += line

            try:
                ps = DatafileParameterSet.objects.get(schema__id=schema_id,
                                                      datafile__id=df_id)
            except DatafileParameterSet.DoesNotExist:
                ps = DatafileParameterSet(schema=schema, datafile=datafile)
                ps.save()

            param_name_strings = ['file', 'date',
                                  'parametersAndStainsTable']
            for param_name_str in param_name_strings:
                try:
                    param_name = ParameterName.objects.get(schema__id=schema_id,
                                                           name=param_name_str)
                except ParameterName.DoesNotExist:
                    logger.error("Didn't find parameter %s in schema id %s"
                                 % (param_name_str, schema_id))
                dfp = DatafileParameter(parameterset=ps, name=param_name)
                dfp.string_value = metadata[param_name_str]
                dfp.save()
        except:
            logger.error(traceback.format_exc())
        finally:
            release_lock()
    def setUp(self):
        from django.contrib.auth.models import User
        from tempfile import mkdtemp

        user = '******'
        pwd = 'secret'
        email = ''
        self.user = User.objects.create_user(user, email, pwd)

        self.test_dir = mkdtemp()

        self.exp = Experiment(title='test exp1',
                              institution_name='monash',
                              created_by=self.user)
        self.exp.save()

        self.dataset = Dataset(description="dataset description...")
        self.dataset.save()
        self.dataset.experiments.add(self.exp)
        self.dataset.save()

        self.datafile = DataFile(dataset=self.dataset,
                                 filename="testfile.txt",
                                 size="42", md5sum='bogus')
        self.datafile.save()

        self.dfo = DataFileObject(
            datafile=self.datafile,
            storage_box=self.datafile.get_default_storage_box(),
            uri="1/testfile.txt")
        self.dfo.save()

        self.schema = Schema(
            namespace="http://localhost/psmtest/df/",
            name="Parameter Set Manager", type=3)
        self.schema.save()

        self.parametername1 = ParameterName(
            schema=self.schema, name="parameter1",
            full_name="Parameter 1")
        self.parametername1.save()

        self.parametername2 = ParameterName(
            schema=self.schema, name="parameter2",
            full_name="Parameter 2",
            data_type=ParameterName.NUMERIC)
        self.parametername2.save()

        self.parametername3 = ParameterName(
            schema=self.schema, name="parameter3",
            full_name="Parameter 3",
            data_type=ParameterName.DATETIME)
        self.parametername3.save()

        self.datafileparameterset = DatafileParameterSet(
            schema=self.schema, datafile=self.datafile)
        self.datafileparameterset.save()

        self.datafileparameter1 = DatafileParameter(
            parameterset=self.datafileparameterset,
            name=self.parametername1, string_value="test1")
        self.datafileparameter1.save()

        self.datafileparameter2 = DatafileParameter(
            parameterset=self.datafileparameterset,
            name=self.parametername2, numerical_value=2)
        self.datafileparameter2.save()

        # Create a ParameterName and Parameter of type LINK to an experiment
        self.parametername_exp_link = ParameterName(
            schema=self.schema, name="exp_link",
            full_name="This parameter is a experiment LINK",
            data_type=ParameterName.LINK)
        self.parametername_exp_link.save()

        self.exp_link_param = DatafileParameter(
            parameterset=self.datafileparameterset,
            name=self.parametername_exp_link)
        exp_url = self.exp.get_absolute_url()  # /experiment/view/1/
        self.exp_link_param.set_value(exp_url)
        self.exp_link_param.save()

        # Create a ParameterName and Parameter of type LINK to a dataset
        self.parametername_dataset_link = ParameterName(
            schema=self.schema, name="dataset_link",
            full_name="This parameter is a dataset LINK",
            data_type=ParameterName.LINK)
        self.parametername_dataset_link.save()

        self.dataset_link_param = DatafileParameter(
            parameterset=self.datafileparameterset,
            name=self.parametername_dataset_link)
        dataset_url = self.dataset.get_absolute_url()  # /dataset/1/
        self.dataset_link_param.set_value(dataset_url)
        self.dataset_link_param.save()

        # Create a ParameterName type LINK to an unresolvable (non-URL)
        # free-text value
        self.parametername_unresolvable_link = ParameterName(
                schema=self.schema, name="freetext_link",
                full_name="This parameter is a non-URL LINK",
                data_type=ParameterName.LINK)
        self.parametername_unresolvable_link.save()
class ParameterSetManagerTestCase(TestCase):

    def setUp(self):
        from django.contrib.auth.models import User
        from tempfile import mkdtemp

        user = '******'
        pwd = 'secret'
        email = ''
        self.user = User.objects.create_user(user, email, pwd)

        self.test_dir = mkdtemp()

        self.exp = Experiment(title='test exp1',
                              institution_name='monash',
                              created_by=self.user)
        self.exp.save()

        self.dataset = Dataset(description="dataset description...")
        self.dataset.save()
        self.dataset.experiments.add(self.exp)
        self.dataset.save()

        self.datafile = DataFile(dataset=self.dataset,
                                 filename="testfile.txt",
                                 size="42", md5sum='bogus')
        self.datafile.save()

        self.dfo = DataFileObject(
            datafile=self.datafile,
            storage_box=self.datafile.get_default_storage_box(),
            uri="1/testfile.txt")
        self.dfo.save()

        self.schema = Schema(
            namespace="http://localhost/psmtest/df/",
            name="Parameter Set Manager", type=3)
        self.schema.save()

        self.parametername1 = ParameterName(
            schema=self.schema, name="parameter1",
            full_name="Parameter 1")
        self.parametername1.save()

        self.parametername2 = ParameterName(
            schema=self.schema, name="parameter2",
            full_name="Parameter 2",
            data_type=ParameterName.NUMERIC)
        self.parametername2.save()

        self.parametername3 = ParameterName(
            schema=self.schema, name="parameter3",
            full_name="Parameter 3",
            data_type=ParameterName.DATETIME)
        self.parametername3.save()

        self.datafileparameterset = DatafileParameterSet(
            schema=self.schema, datafile=self.datafile)
        self.datafileparameterset.save()

        self.datafileparameter1 = DatafileParameter(
            parameterset=self.datafileparameterset,
            name=self.parametername1, string_value="test1")
        self.datafileparameter1.save()

        self.datafileparameter2 = DatafileParameter(
            parameterset=self.datafileparameterset,
            name=self.parametername2, numerical_value=2)
        self.datafileparameter2.save()

        # Create a ParameterName and Parameter of type LINK to an experiment
        self.parametername_exp_link = ParameterName(
            schema=self.schema, name="exp_link",
            full_name="This parameter is a experiment LINK",
            data_type=ParameterName.LINK)
        self.parametername_exp_link.save()

        self.exp_link_param = DatafileParameter(
            parameterset=self.datafileparameterset,
            name=self.parametername_exp_link)
        exp_url = self.exp.get_absolute_url()  # /experiment/view/1/
        self.exp_link_param.set_value(exp_url)
        self.exp_link_param.save()

        # Create a ParameterName and Parameter of type LINK to a dataset
        self.parametername_dataset_link = ParameterName(
            schema=self.schema, name="dataset_link",
            full_name="This parameter is a dataset LINK",
            data_type=ParameterName.LINK)
        self.parametername_dataset_link.save()

        self.dataset_link_param = DatafileParameter(
            parameterset=self.datafileparameterset,
            name=self.parametername_dataset_link)
        dataset_url = self.dataset.get_absolute_url()  # /dataset/1/
        self.dataset_link_param.set_value(dataset_url)
        self.dataset_link_param.save()

        # Create a ParameterName type LINK to an unresolvable (non-URL)
        # free-text value
        self.parametername_unresolvable_link = ParameterName(
                schema=self.schema, name="freetext_link",
                full_name="This parameter is a non-URL LINK",
                data_type=ParameterName.LINK)
        self.parametername_unresolvable_link.save()

    def tearDown(self):
        self.exp.delete()
        self.user.delete()
        self.parametername1.delete()
        self.parametername2.delete()
        self.parametername3.delete()
        self.parametername_exp_link.delete()
        self.parametername_dataset_link.delete()
        self.parametername_unresolvable_link.delete()
        self.schema.delete()

    def test_existing_parameterset(self):

        psm = ParameterSetManager(parameterset=self.datafileparameterset)

        self.assertTrue(psm.get_schema().namespace ==
                        "http://localhost/psmtest/df/")

        self.assertTrue(psm.get_param("parameter1").string_value == "test1")

        self.assertTrue(psm.get_param("parameter2", True) == 2)

    def test_new_parameterset(self):

        psm = ParameterSetManager(parentObject=self.datafile,
                                  schema="http://localhost/psmtest/df2/")

        self.assertTrue(psm.get_schema().namespace ==
                        "http://localhost/psmtest/df2/")

        psm.set_param("newparam1", "test3", "New Parameter 1")

        self.assertTrue(psm.get_param("newparam1").string_value ==
                        "test3")

        self.assertTrue(psm.get_param("newparam1").name.full_name ==
                        "New Parameter 1")

        psm.new_param("newparam1", "test4")

        self.assertTrue(len(psm.get_params("newparam1", True)) == 2)

        psm.set_param_list("newparam2", ("a", "b", "c", "d"))

        self.assertTrue(len(psm.get_params("newparam2")) == 4)

        psm.set_params_from_dict(
            {"newparam2": "test5", "newparam3": 3})

        self.assertTrue(psm.get_param("newparam2", True) == "test5")

        # the newparam3 gets created and '3' is set to a string_value
        # since once cannot assume that an initial numeric value
        # will imply continuing numeric type for this new param
        self.assertTrue(psm.get_param("newparam3").string_value == '3')

        psm.delete_params("newparam1")

        self.assertTrue(len(psm.get_params("newparam1", True)) == 0)

    def test_link_parameter_type(self):
        """
        Test that Parameter.link_gfk (GenericForeignKey) is correctly
        assigned after using Parameter.set_value(some_url) for a LINK Parameter.
        """
        psm = ParameterSetManager(parameterset=self.datafileparameterset)

        # Check link to experiment
        exp_url = self.exp.get_absolute_url()  # /experiment/view/1/
        self.assertTrue(psm.get_param("exp_link").string_value ==
                        exp_url)

        self.assertTrue(psm.get_param("exp_link").link_id ==
                        self.exp.id)

        exp_ct = ContentType.objects.get(model__iexact="experiment")
        self.assertTrue(psm.get_param("exp_link").link_ct == exp_ct)

        self.assertTrue(psm.get_param("exp_link").link_gfk == self.exp)

        # Check link to dataset
        dataset_url = self.dataset.get_absolute_url()  # /dataset/1/
        self.assertTrue(psm.get_param("dataset_link").string_value ==
                        dataset_url)

        self.assertTrue(psm.get_param("dataset_link").link_id ==
                        self.dataset.id)

        dataset_ct = ContentType.objects.get(model__iexact="dataset")
        self.assertTrue(psm.get_param("dataset_link").link_ct == dataset_ct)

        self.assertTrue(psm.get_param("dataset_link").link_gfk == self.dataset)

    def test_link_parameter_type_extra(self):
        # make a second ParameterSet for testing some variations
        # in URL values
        self.datafileparameterset2 = DatafileParameterSet(
            schema=self.schema, datafile=self.datafile)
        self.datafileparameterset2.save()

        psm = ParameterSetManager(parameterset=self.datafileparameterset2)

        self.dataset_link_param2 = DatafileParameter(
            parameterset=self.datafileparameterset2,
            name=self.parametername_dataset_link)
        # /dataset/1 - no trailing slash
        dataset_url = self.dataset.get_absolute_url()
        self.dataset_link_param2.set_value(dataset_url)
        self.dataset_link_param2.save()

        # Check link_id/link_ct/link_gfk to dataset
        self.assertTrue(psm.get_param("dataset_link").link_id ==
                        self.dataset.id)

        dataset_ct = ContentType.objects.get(model__iexact="dataset")
        self.assertTrue(psm.get_param("dataset_link").link_ct == dataset_ct)

        self.assertTrue(psm.get_param("dataset_link").link_gfk == self.dataset)

        # Test links of the form /api/v1/experiment/<experiment_id>/
        self.exp_link_param2 = DatafileParameter(
            parameterset=self.datafileparameterset2,
            name=self.parametername_exp_link)
        exp_url = '/api/v1/experiment/%s/' % self.exp.id
        self.exp_link_param2.set_value(exp_url)
        self.exp_link_param2.save()

        # Check link_id/link_ct/link_gfk to experiment
        self.assertTrue(psm.get_param("exp_link").link_id ==
                        self.exp.id)

        exp_ct = ContentType.objects.get(model__iexact="experiment")
        self.assertTrue(psm.get_param("exp_link").link_ct == exp_ct)

        self.assertTrue(psm.get_param("exp_link").link_gfk == self.exp)

    def test_unresolvable_link_parameter(self):
        """
        Test that LINK Parameters that can't be resolved to a model (including
        non-URL values) still work.
        """
        self.datafileparameterset3 = DatafileParameterSet(
                schema=self.schema, datafile=self.datafile)
        self.datafileparameterset3.save()

        psm = ParameterSetManager(parameterset=self.datafileparameterset3)

        # Create a Parameter of type LINK to an unresolvable (non-URL)
        # free-text value
        self.freetext_link_param = DatafileParameter(
                parameterset=self.datafileparameterset3,
                name=self.parametername_unresolvable_link)
        self.assertRaises(SuspiciousOperation,
                          lambda: self.freetext_link_param.set_value(
                              "FREETEXT_ID_123"))

    def test_tz_naive_date_handling(self):
        """
        Ensure that dates are handling in a timezone-aware way.
        """
        psm = ParameterSetManager(parameterset=self.datafileparameterset)

        psm.new_param("parameter3", str(datetime(1970, 01, 01, 10, 0, 0)))

        expect(psm.get_param("parameter3", True))\
            .to_equal(datetime(1970, 01, 01, 0, 0, 0, tzinfo=pytz.utc))

    def test_tz_aware_date_handling(self):
        """
        Ensure that dates are handling in a timezone-aware way.
        """
        psm = ParameterSetManager(parameterset=self.datafileparameterset)

        psm.new_param("parameter3",
                      '1970-01-01T08:00:00+08:00')

        expect(psm.get_param("parameter3", True))\
            .to_equal(datetime(1970, 01, 01, 0, 0, 0, tzinfo=pytz.utc))
Beispiel #20
0
class ParameterSetManagerTestCase(TestCase):
    def setUp(self):
        from django.contrib.auth.models import User
        from tempfile import mkdtemp

        user = '******'
        pwd = 'secret'
        email = ''
        self.user = User.objects.create_user(user, email, pwd)

        self.test_dir = mkdtemp()

        self.exp = Experiment(title='test exp1',
                              institution_name='monash',
                              created_by=self.user)
        self.exp.save()

        self.dataset = Dataset(description="dataset description...")
        self.dataset.save()
        self.dataset.experiments.add(self.exp)
        self.dataset.save()

        self.datafile = DataFile(dataset=self.dataset,
                                 filename="testfile.txt",
                                 size="42",
                                 md5sum='bogus')
        self.datafile.save()

        self.dfo = DataFileObject(
            datafile=self.datafile,
            storage_box=self.datafile.get_default_storage_box(),
            uri="1/testfile.txt")
        self.dfo.save()

        self.schema = Schema(namespace="http://localhost/psmtest/df/",
                             name="Parameter Set Manager",
                             type=3)
        self.schema.save()

        self.parametername1 = ParameterName(schema=self.schema,
                                            name="parameter1",
                                            full_name="Parameter 1")
        self.parametername1.save()

        self.parametername2 = ParameterName(schema=self.schema,
                                            name="parameter2",
                                            full_name="Parameter 2",
                                            data_type=ParameterName.NUMERIC)
        self.parametername2.save()

        self.parametername3 = ParameterName(schema=self.schema,
                                            name="parameter3",
                                            full_name="Parameter 3",
                                            data_type=ParameterName.DATETIME)
        self.parametername3.save()

        self.datafileparameterset = DatafileParameterSet(
            schema=self.schema, datafile=self.datafile)
        self.datafileparameterset.save()

        self.datafileparameter1 = DatafileParameter(
            parameterset=self.datafileparameterset,
            name=self.parametername1,
            string_value="test1")
        self.datafileparameter1.save()

        self.datafileparameter2 = DatafileParameter(
            parameterset=self.datafileparameterset,
            name=self.parametername2,
            numerical_value=2)
        self.datafileparameter2.save()

        # Create a ParameterName and Parameter of type LINK to an experiment
        self.parametername_exp_link = ParameterName(
            schema=self.schema,
            name="exp_link",
            full_name="This parameter is a experiment LINK",
            data_type=ParameterName.LINK)
        self.parametername_exp_link.save()

        self.exp_link_param = DatafileParameter(
            parameterset=self.datafileparameterset,
            name=self.parametername_exp_link)
        exp_url = self.exp.get_absolute_url()  # /experiment/view/1/
        self.exp_link_param.set_value(exp_url)
        self.exp_link_param.save()

        # Create a ParameterName and Parameter of type LINK to a dataset
        self.parametername_dataset_link = ParameterName(
            schema=self.schema,
            name="dataset_link",
            full_name="This parameter is a dataset LINK",
            data_type=ParameterName.LINK)
        self.parametername_dataset_link.save()

        self.dataset_link_param = DatafileParameter(
            parameterset=self.datafileparameterset,
            name=self.parametername_dataset_link)
        dataset_url = self.dataset.get_absolute_url()  # /dataset/1/
        self.dataset_link_param.set_value(dataset_url)
        self.dataset_link_param.save()

        # Create a ParameterName type LINK to an unresolvable (non-URL)
        # free-text value
        self.parametername_unresolvable_link = ParameterName(
            schema=self.schema,
            name="freetext_link",
            full_name="This parameter is a non-URL LINK",
            data_type=ParameterName.LINK)
        self.parametername_unresolvable_link.save()

    def tearDown(self):
        self.exp.delete()
        self.user.delete()
        self.parametername1.delete()
        self.parametername2.delete()
        self.parametername3.delete()
        self.parametername_exp_link.delete()
        self.parametername_dataset_link.delete()
        self.parametername_unresolvable_link.delete()
        self.schema.delete()

    def test_existing_parameterset(self):

        psm = ParameterSetManager(parameterset=self.datafileparameterset)

        self.assertTrue(
            psm.get_schema().namespace == "http://localhost/psmtest/df/")

        self.assertTrue(psm.get_param("parameter1").string_value == "test1")

        self.assertTrue(psm.get_param("parameter2", True) == 2)

    def test_new_parameterset(self):

        psm = ParameterSetManager(parentObject=self.datafile,
                                  schema="http://localhost/psmtest/df2/")

        self.assertTrue(
            psm.get_schema().namespace == "http://localhost/psmtest/df2/")

        psm.set_param("newparam1", "test3", "New Parameter 1")

        self.assertTrue(psm.get_param("newparam1").string_value == "test3")

        self.assertTrue(
            psm.get_param("newparam1").name.full_name == "New Parameter 1")

        psm.new_param("newparam1", "test4")

        self.assertTrue(len(psm.get_params("newparam1", True)) == 2)

        psm.set_param_list("newparam2", ("a", "b", "c", "d"))

        self.assertTrue(len(psm.get_params("newparam2")) == 4)

        psm.set_params_from_dict({"newparam2": "test5", "newparam3": 3})

        self.assertTrue(psm.get_param("newparam2", True) == "test5")

        # the newparam3 gets created and '3' is set to a string_value
        # since once cannot assume that an initial numeric value
        # will imply continuing numeric type for this new param
        self.assertTrue(psm.get_param("newparam3").string_value == '3')

        psm.delete_params("newparam1")

        self.assertTrue(len(psm.get_params("newparam1", True)) == 0)

    def test_link_parameter_type(self):
        """
        Test that Parameter.link_gfk (GenericForeignKey) is correctly
        assigned after using Parameter.set_value(some_url) for a LINK Parameter.
        """
        psm = ParameterSetManager(parameterset=self.datafileparameterset)

        # Check link to experiment
        exp_url = self.exp.get_absolute_url()  # /experiment/view/1/
        self.assertTrue(psm.get_param("exp_link").string_value == exp_url)

        self.assertTrue(psm.get_param("exp_link").link_id == self.exp.id)

        exp_ct = ContentType.objects.get(model__iexact="experiment")
        self.assertTrue(psm.get_param("exp_link").link_ct == exp_ct)

        self.assertTrue(psm.get_param("exp_link").link_gfk == self.exp)

        # Check link to dataset
        dataset_url = self.dataset.get_absolute_url()  # /dataset/1/
        self.assertTrue(
            psm.get_param("dataset_link").string_value == dataset_url)

        self.assertTrue(
            psm.get_param("dataset_link").link_id == self.dataset.id)

        dataset_ct = ContentType.objects.get(model__iexact="dataset")
        self.assertTrue(psm.get_param("dataset_link").link_ct == dataset_ct)

        self.assertTrue(psm.get_param("dataset_link").link_gfk == self.dataset)

    def test_link_parameter_type_extra(self):
        # make a second ParameterSet for testing some variations
        # in URL values
        self.datafileparameterset2 = DatafileParameterSet(
            schema=self.schema, datafile=self.datafile)
        self.datafileparameterset2.save()

        psm = ParameterSetManager(parameterset=self.datafileparameterset2)

        self.dataset_link_param2 = DatafileParameter(
            parameterset=self.datafileparameterset2,
            name=self.parametername_dataset_link)
        # /dataset/1 - no trailing slash
        dataset_url = self.dataset.get_absolute_url()
        self.dataset_link_param2.set_value(dataset_url)
        self.dataset_link_param2.save()

        # Check link_id/link_ct/link_gfk to dataset
        self.assertTrue(
            psm.get_param("dataset_link").link_id == self.dataset.id)

        dataset_ct = ContentType.objects.get(model__iexact="dataset")
        self.assertTrue(psm.get_param("dataset_link").link_ct == dataset_ct)

        self.assertTrue(psm.get_param("dataset_link").link_gfk == self.dataset)

        # Test links of the form /api/v1/experiment/<experiment_id>/
        self.exp_link_param2 = DatafileParameter(
            parameterset=self.datafileparameterset2,
            name=self.parametername_exp_link)
        exp_url = '/api/v1/experiment/%s/' % self.exp.id
        self.exp_link_param2.set_value(exp_url)
        self.exp_link_param2.save()

        # Check link_id/link_ct/link_gfk to experiment
        self.assertTrue(psm.get_param("exp_link").link_id == self.exp.id)

        exp_ct = ContentType.objects.get(model__iexact="experiment")
        self.assertTrue(psm.get_param("exp_link").link_ct == exp_ct)

        self.assertTrue(psm.get_param("exp_link").link_gfk == self.exp)

    def test_unresolvable_link_parameter(self):
        """
        Test that LINK Parameters that can't be resolved to a model (including
        non-URL values) still work.
        """
        self.datafileparameterset3 = DatafileParameterSet(
            schema=self.schema, datafile=self.datafile)
        self.datafileparameterset3.save()

        psm = ParameterSetManager(parameterset=self.datafileparameterset3)

        # Create a Parameter of type LINK to an unresolvable (non-URL)
        # free-text value
        self.freetext_link_param = DatafileParameter(
            parameterset=self.datafileparameterset3,
            name=self.parametername_unresolvable_link)
        self.assertRaises(
            SuspiciousOperation,
            lambda: self.freetext_link_param.set_value("FREETEXT_ID_123"))

    def test_tz_naive_date_handling(self):
        """
        Ensure that dates are handling in a timezone-aware way.
        """
        psm = ParameterSetManager(parameterset=self.datafileparameterset)

        psm.new_param("parameter3", str(datetime(1970, 01, 01, 10, 0, 0)))

        expect(psm.get_param("parameter3", True))\
            .to_equal(datetime(1970, 01, 01, 0, 0, 0, tzinfo=pytz.utc))

    def test_tz_aware_date_handling(self):
        """
        Ensure that dates are handling in a timezone-aware way.
        """
        psm = ParameterSetManager(parameterset=self.datafileparameterset)

        psm.new_param("parameter3", '1970-01-01T08:00:00+08:00')

        expect(psm.get_param("parameter3", True))\
            .to_equal(datetime(1970, 01, 01, 0, 0, 0, tzinfo=pytz.utc))
    def saveMetadata(self, instance, schema, metadata):
        """Save all the metadata to a Dataset_Files parameter set.
        """
	logger.error('Olympus-saveMetadata()')
        parameters = self.getParameters(schema, metadata)

        if not parameters:
            return None

        try:
            ps = DatafileParameterSet.objects.get(schema=schema,
                                                  dataset_file=instance)
            return ps  # if already exists then just return it
        except DatafileParameterSet.DoesNotExist:
            ps = DatafileParameterSet(schema=schema,
                                      dataset_file=instance)
            ps.save()

	for p in parameters:
            print p.name
            if p.name in metadata:
                dfp = DatafileParameter(parameterset=ps,
                                        name=p)
                if p.isNumeric():
                    if metadata[p.name] != '':
                        dfp.numerical_value = metadata[p.name]
                        dfp.save()
                else:
                    print p.name
                    if isinstance(metadata[p.name], list):
                        for val in reversed(metadata[p.name]):
                            strip_val = val.strip()
                            if strip_val:
                                if not strip_val in exclude_line:
                                    dfp = DatafileParameter(parameterset=ps,
                                                            name=p)
                                    dfp.string_value = strip_val
                                    dfp.save()
                    else:
                        dfp.string_value = metadata[p.name]
                        dfp.save()

        return ps
Beispiel #22
0
    def setUp(self):
        from django.contrib.auth.models import User
        from tempfile import mkdtemp

        user = '******'
        pwd = 'secret'
        email = ''
        self.user = User.objects.create_user(user, email, pwd)

        self.test_dir = mkdtemp()

        self.exp = Experiment(title='test exp1',
                              institution_name='monash',
                              created_by=self.user)
        self.exp.save()

        self.dataset = Dataset(description="dataset description...")
        self.dataset.save()
        self.dataset.experiments.add(self.exp)
        self.dataset.save()

        self.datafile = DataFile(dataset=self.dataset,
                                 filename="testfile.txt",
                                 size="42",
                                 md5sum='bogus')
        self.datafile.save()

        self.dfo = DataFileObject(
            datafile=self.datafile,
            storage_box=self.datafile.get_default_storage_box(),
            uri="1/testfile.txt")
        self.dfo.save()

        self.schema = Schema(namespace="http://localhost/psmtest/df/",
                             name="Parameter Set Manager",
                             type=3)
        self.schema.save()

        self.parametername1 = ParameterName(schema=self.schema,
                                            name="parameter1",
                                            full_name="Parameter 1")
        self.parametername1.save()

        self.parametername2 = ParameterName(schema=self.schema,
                                            name="parameter2",
                                            full_name="Parameter 2",
                                            data_type=ParameterName.NUMERIC)
        self.parametername2.save()

        self.parametername3 = ParameterName(schema=self.schema,
                                            name="parameter3",
                                            full_name="Parameter 3",
                                            data_type=ParameterName.DATETIME)
        self.parametername3.save()

        self.datafileparameterset = DatafileParameterSet(
            schema=self.schema, datafile=self.datafile)
        self.datafileparameterset.save()

        self.datafileparameter1 = DatafileParameter(
            parameterset=self.datafileparameterset,
            name=self.parametername1,
            string_value="test1")
        self.datafileparameter1.save()

        self.datafileparameter2 = DatafileParameter(
            parameterset=self.datafileparameterset,
            name=self.parametername2,
            numerical_value=2)
        self.datafileparameter2.save()

        # Create a ParameterName and Parameter of type LINK to an experiment
        self.parametername_exp_link = ParameterName(
            schema=self.schema,
            name="exp_link",
            full_name="This parameter is a experiment LINK",
            data_type=ParameterName.LINK)
        self.parametername_exp_link.save()

        self.exp_link_param = DatafileParameter(
            parameterset=self.datafileparameterset,
            name=self.parametername_exp_link)
        exp_url = self.exp.get_absolute_url()  # /experiment/view/1/
        self.exp_link_param.set_value(exp_url)
        self.exp_link_param.save()

        # Create a ParameterName and Parameter of type LINK to a dataset
        self.parametername_dataset_link = ParameterName(
            schema=self.schema,
            name="dataset_link",
            full_name="This parameter is a dataset LINK",
            data_type=ParameterName.LINK)
        self.parametername_dataset_link.save()

        self.dataset_link_param = DatafileParameter(
            parameterset=self.datafileparameterset,
            name=self.parametername_dataset_link)
        dataset_url = self.dataset.get_absolute_url()  # /dataset/1/
        self.dataset_link_param.set_value(dataset_url)
        self.dataset_link_param.save()

        # Create a ParameterName type LINK to an unresolvable (non-URL)
        # free-text value
        self.parametername_unresolvable_link = ParameterName(
            schema=self.schema,
            name="freetext_link",
            full_name="This parameter is a non-URL LINK",
            data_type=ParameterName.LINK)
        self.parametername_unresolvable_link.save()
    def saveMetadata(self, instance, schema, metadata):
        """Save all the metadata to a Dataset_Files paramamter set.
        """
        parameters = self.getParameters(schema, metadata)

        exclude_line = dict()
        exclude_line['-----'] = None
        exclude_line['Reading global metadata'] = None
        exclude_line['Reading metadata'] = None
        exclude_line['Reading core metadata'] = None
        exclude_line['Populating metadata'] = None
        exclude_line['Reading tags'] = None
        exclude_line['Verifying Gatan format'] = None
        exclude_line['Initializing reader'] = None
        exclude_line['Checking file format [Gatan Digital Micrograph]'] = None

        if not parameters:
            return None

        try:
            ps = DatafileParameterSet.objects.get(schema=schema,
                                                  dataset_file=instance)
            return ps  # if already exists then just return it
        except DatafileParameterSet.DoesNotExist:
            ps = DatafileParameterSet(schema=schema,
                                      dataset_file=instance)
            ps.save()

        for p in parameters:
            print p.name
            if p.name in metadata:
                dfp = DatafileParameter(parameterset=ps,
                                        name=p)
                if p.isNumeric():
                    if metadata[p.name] != '':
                        dfp.numerical_value = metadata[p.name]
                        dfp.save()
                else:
                    print p.name
                    if isinstance(metadata[p.name], list):
                        for val in reversed(metadata[p.name]):
                            strip_val = val.strip()
                            if strip_val:
                                if not strip_val in exclude_line:
                                    dfp = DatafileParameter(parameterset=ps,
                                                            name=p)
                                    dfp.string_value = strip_val
                                    dfp.save()
                    else:
                        dfp.string_value = metadata[p.name]
                        dfp.save()

        return ps
def run_showinf(showinf_path, inputfilename, df_id, schema_id):
    """
    Run Bioformats showinf to extract metadata.
    """
    cache = caches['celery-locks']

    # Locking functions to ensure only one instance of
    # showinf operates on each datafile at a time.
    lock_id = 'bioformats-filter-showinf-lock-%d' % df_id

    # cache.add fails if if the key already exists
    def acquire_lock(): return cache.add(lock_id, 'true', LOCK_EXPIRE)
    # cache.delete() can be slow, but we have to use it
    # to take advantage of using add() for atomic locking
    def release_lock(): cache.delete(lock_id)

    if acquire_lock():
        try:
            schema = Schema.objects.get(id=schema_id)
            datafile = DataFile.objects.get(id=df_id)
            ps = DatafileParameterSet.objects.filter(schema=schema,
                                                     datafile=datafile).first()
            if ps:
                info_param = ParameterName.objects.get(schema__id=schema_id,
                                                       name='image_information')
                if DatafileParameter.objects.filter(parameterset=ps,
                                                    name=info_param).exists():
                    logger.info("Metadata already exists for df_id %d"
                                % df_id)
                    return

            cmdline = "'%s' '%s' -nopix" % (showinf_path, inputfilename)
            logger.info(cmdline)
            p = subprocess.Popen(cmdline, stdout=subprocess.PIPE,
                                 stderr=subprocess.STDOUT, shell=True)
            stdout, _ = p.communicate()
            if p.returncode != 0:
                logger.error(stdout)
                return
            image_info_list = stdout.split('\n')[11:]

            # Some/all? of these excludes below are specific to DM3 format:
            exclude_line = dict()
            exclude_line['-----'] = None
            exclude_line['Reading global metadata'] = None
            exclude_line['Reading metadata'] = None
            exclude_line['Reading core metadata'] = None
            exclude_line['Populating metadata'] = None
            exclude_line['Reading tags'] = None
            exclude_line['Verifying Gatan format'] = None
            exclude_line['Initializing reader'] = None
            exclude_line['Checking file format [Gatan Digital Micrograph]'] = \
                None

            try:
                ps = DatafileParameterSet.objects.get(schema__id=schema_id,
                                                      datafile__id=df_id)
            except DatafileParameterSet.DoesNotExist:
                ps = DatafileParameterSet(schema=schema, datafile=datafile)
                ps.save()

            param_name = ParameterName.objects.get(schema__id=schema_id,
                                                   name='image_information')
            for val in reversed(image_info_list):
                strip_val = val.strip()
                if strip_val:
                    if strip_val not in exclude_line:
                        dfp = DatafileParameter(parameterset=ps,
                                                name=param_name)
                        dfp.string_value = strip_val
                        dfp.save()
        finally:
            release_lock()
Beispiel #25
0
def run_fcsplot(fcsplot_path, inputfilename, df_id, schema_id):
    """
    Run fcsplot on a FCS file.
    """
    cache = caches['celery-locks']

    # Locking functions to ensure only one instance of
    # fcsplot operates on each datafile at a time.
    lock_id = 'fcs-filter-fcsplot-lock-%d' % df_id

    # cache.add fails if if the key already exists
    def acquire_lock(): return cache.add(lock_id, 'true', LOCK_EXPIRE)
    # cache.delete() can be slow, but we have to use it
    # to take advantage of using add() for atomic locking
    def release_lock(): cache.delete(lock_id)

    if acquire_lock():
        try:
            schema = Schema.objects.get(id=schema_id)
            datafile = DataFile.objects.get(id=df_id)
            ps = DatafileParameterSet.objects.filter(schema=schema,
                                                     datafile=datafile).first()
            if ps:
                prev_param = ParameterName.objects.get(schema__id=schema_id,
                                                       name='previewImage')
                if DatafileParameter.objects.filter(parameterset=ps,
                                                    name=prev_param).exists():
                    logger.info("FCS preview already exists for df_id %d"
                                % df_id)
                    return

            outputextension = "png"
            dfo = DataFileObject.objects.filter(datafile__id=df_id,
                                                verified=True).first()
            preview_image_rel_file_path = os.path.join(
                os.path.dirname(urlparse.urlparse(dfo.uri).path),
                str(df_id),
                '%s.%s' % (os.path.basename(inputfilename),
                           outputextension))
            preview_image_file_path = os.path.join(
                settings.METADATA_STORE_PATH, preview_image_rel_file_path)

            if not os.path.exists(os.path.dirname(preview_image_file_path)):
                os.makedirs(os.path.dirname(preview_image_file_path))

            cmdline = "'%s' '%s' '%s' '%s'" % \
                (sys.executable, fcsplot_path, inputfilename,
                 preview_image_file_path)
            logger.info(cmdline)
            p = subprocess.Popen(cmdline, stdout=subprocess.PIPE,
                                 stderr=subprocess.STDOUT, shell=True)
            stdout, _ = p.communicate()
            if p.returncode != 0:
                logger.error(stdout)
                return
            try:
                ps = DatafileParameterSet.objects.get(schema__id=schema_id,
                                                      datafile__id=df_id)
            except DatafileParameterSet.DoesNotExist:
                ps = DatafileParameterSet(schema=schema,
                                          datafile=datafile)
                ps.save()
            param_name = ParameterName.objects.get(schema__id=schema_id,
                                                   name='previewImage')
            dfp = DatafileParameter(parameterset=ps, name=param_name)
            dfp.string_value = preview_image_rel_file_path
            dfp.save()
        except:
            logger.error(traceback.format_exc())
        finally:
            release_lock()
def run_bfconvert(bfconvert_path, inputfilename, df_id, schema_id):
    """
    Run Bioformats bfconvert on an image file.
    """
    cache = caches['celery-locks']

    # Locking functions to ensure only one instance of
    # bfconvert operates on each datafile at a time.
    lock_id = 'bioformats-filter-bfconvert-lock-%d' % df_id

    # cache.add fails if if the key already exists
    def acquire_lock(): return cache.add(lock_id, 'true', LOCK_EXPIRE)
    # cache.delete() can be slow, but we have to use it
    # to take advantage of using add() for atomic locking
    def release_lock(): cache.delete(lock_id)

    if acquire_lock():
        try:
            schema = Schema.objects.get(id=schema_id)
            datafile = DataFile.objects.get(id=df_id)
            ps = DatafileParameterSet.objects.filter(schema=schema,
                                                     datafile=datafile).first()
            if ps:
                prev_param = ParameterName.objects.get(schema__id=schema_id,
                                                       name='previewImage')
                if DatafileParameter.objects.filter(parameterset=ps,
                                                    name=prev_param).exists():
                    logger.info("Preview image already exists for df_id %d"
                                % df_id)
                    return

            outputextension = "png"
            dfo = DataFileObject.objects.filter(datafile__id=df_id,
                                                verified=True).first()
            preview_image_rel_file_path = os.path.join(
                os.path.dirname(urlparse.urlparse(dfo.uri).path),
                str(df_id),
                '%s.%s' % (os.path.basename(inputfilename),
                           outputextension))
            preview_image_file_path = os.path.join(
                settings.METADATA_STORE_PATH, preview_image_rel_file_path)

            if not os.path.exists(
                    os.path.dirname(preview_image_file_path)):
                os.makedirs(os.path.dirname(preview_image_file_path))

            # Extract only the first image from the stack:
            cmdline = "'%s' -series 0 -timepoint 0 -channel 0 -z 0 " \
                "'%s' '%s' -overwrite" %\
                (bfconvert_path, inputfilename, preview_image_file_path)
            logger.info(cmdline)
            p = subprocess.Popen(cmdline, stdout=subprocess.PIPE,
                                 stderr=subprocess.STDOUT, shell=True)
            stdout, _ = p.communicate()
            if p.returncode != 0:
                logger.error(stdout)
                return
            os.rename(preview_image_file_path,
                      preview_image_file_path + '.bioformats')

            # Run ImageMagick convert with contrast-stretch on an image file.
            # We could probably do this with the Wand Python module instead.
            cmdline = "convert '%s.bioformats' -contrast-stretch 0 '%s'" %\
                (preview_image_file_path, preview_image_file_path)
            logger.info(cmdline)
            p = subprocess.Popen(cmdline, stdout=subprocess.PIPE,
                                 stderr=subprocess.STDOUT, shell=True)
            stdout, _ = p.communicate()
            os.unlink(preview_image_file_path + '.bioformats')
            if p.returncode != 0:
                logger.error(stdout)
                return
            try:
                ps = DatafileParameterSet.objects.get(schema__id=schema_id,
                                                      datafile__id=df_id)
            except DatafileParameterSet.DoesNotExist:
                ps = DatafileParameterSet(schema=schema,
                                          datafile=datafile)
                ps.save()
            param_name = ParameterName.objects.get(schema__id=schema_id,
                                                   name='previewImage')
            dfp = DatafileParameter(parameterset=ps, name=param_name)
            dfp.string_value = preview_image_rel_file_path
            dfp.save()
        finally:
            release_lock()
Beispiel #27
0
    def test_parameter(self):
        exp = Experiment(
            title='test exp1',
            institution_name='Australian Synchrotron',
            approved=True,
            created_by=self.user,
            public_access=Experiment.PUBLIC_ACCESS_NONE,
        )
        exp.save()

        dataset = Dataset(description="dataset description")
        dataset.save()
        dataset.experiments.add(exp)
        dataset.save()

        df_file = DataFile(dataset=dataset,
                           filename='file.txt',
                           size=42,
                           md5sum='bogus')
        df_file.save()

        df_schema = Schema(namespace='http://www.cern.ch/felzmann/schema1.xml',
                           type=Schema.DATAFILE)
        df_schema.save()

        ds_schema = Schema(namespace='http://www.cern.ch/felzmann/schema2.xml',
                           type=Schema.DATASET)
        ds_schema.save()

        exp_schema = Schema(
            namespace='http://www.cern.ch/felzmann/schema3.xml',
            type=Schema.EXPERIMENT)
        exp_schema.save()

        df_parname = ParameterName(schema=df_schema,
                                   name='name',
                                   full_name='full_name',
                                   units='image/jpg',
                                   data_type=ParameterName.FILENAME)
        df_parname.save()

        ds_parname = ParameterName(schema=ds_schema,
                                   name='name',
                                   full_name='full_name',
                                   units='image/jpg',
                                   data_type=ParameterName.FILENAME)
        ds_parname.save()

        exp_parname = ParameterName(schema=exp_schema,
                                    name='name',
                                    full_name='full_name',
                                    units='image/jpg',
                                    data_type=ParameterName.FILENAME)
        exp_parname.save()

        df_parset = DatafileParameterSet(schema=df_schema, datafile=df_file)
        df_parset.save()

        ds_parset = DatasetParameterSet(schema=ds_schema, dataset=dataset)
        ds_parset.save()

        exp_parset = ExperimentParameterSet(schema=exp_schema, experiment=exp)
        exp_parset.save()

        with self.settings(METADATA_STORE_PATH=os.path.dirname(__file__)):
            filename = 'test.jpg'
            df_parameter = DatafileParameter(name=df_parname,
                                             parameterset=df_parset,
                                             string_value=filename)
            df_parameter.save()

            ds_parameter = DatasetParameter(name=ds_parname,
                                            parameterset=ds_parset,
                                            string_value=filename)
            ds_parameter.save()

            exp_parameter = ExperimentParameter(name=exp_parname,
                                                parameterset=exp_parset,
                                                string_value=filename)
            exp_parameter.save()

            self.assertEqual(
                "<a href='/display/DatafileImage/load/%i/' target='_blank'><img style='width: 300px;' src='/display/DatafileImage/load/%i/' /></a>"
                %  # noqa
                (df_parameter.id, df_parameter.id),
                df_parameter.get())

            self.assertEqual(
                "<a href='/display/DatasetImage/load/%i/' target='_blank'><img style='width: 300px;' src='/display/DatasetImage/load/%i/' /></a>"
                %  # noqa
                (ds_parameter.id, ds_parameter.id),
                ds_parameter.get())

            self.assertEqual(
                "<a href='/display/ExperimentImage/load/%i/' target='_blank'><img style='width: 300px;' src='/display/ExperimentImage/load/%i/' /></a>"
                %  # noqa
                (exp_parameter.id, exp_parameter.id),
                exp_parameter.get())
Beispiel #28
0
    def saveMetadata(self, instance, schema, metadata):
        """Save all the metadata to a Datafile parameter set.
        """
        parameters = self.getParameters(schema, metadata)

        if not parameters:
            print "Bailing out of saveMetadata because of 'not parameters'."
            return None

        try:
            ps = DatafileParameterSet.objects.get(schema=schema,
                                                  datafile=instance)
            print "Parameter set already exists for %s, " \
                "so we'll just return it." % instance.filename
            return ps  # if already exists then just return it
        except DatafileParameterSet.DoesNotExist:
            ps = DatafileParameterSet(schema=schema,
                                      datafile=instance)
            ps.save()

        for p in parameters:
            print p.name
            if p.name in metadata:
                dfp = DatafileParameter(parameterset=ps,
                                        name=p)
                if p.isNumeric():
                    if metadata[p.name] != '':
                        dfp.numerical_value = metadata[p.name]
                        dfp.save()
                else:
                    print p.name
                    if isinstance(metadata[p.name], list):
                        for val in reversed(metadata[p.name]):
                            strip_val = val.strip()
                            if strip_val:
                                if strip_val not in exclude_line:
                                    dfp = DatafileParameter(parameterset=ps,
                                                            name=p)
                                    dfp.string_value = strip_val
                                    dfp.save()
                    else:
                        dfp.string_value = metadata[p.name]
                        dfp.save()

        return ps