def saveFlexstationMetadata(self, instance, schema, metadata):
        """Saves or overwrites the datafile's metadata to a Dataset_Files parameter set in the database.
        """
        logger.info('Saving Metadata')

        parameters = self.getParameters(schema, metadata)
        if not parameters:
            return None

        try:
            ps = DatafileParameterSet.objects.get(schema=schema,
                                                  dataset_file=instance)
            return ps  # if already exists then just return it
        except DatafileParameterSet.DoesNotExist:
            ps = DatafileParameterSet(schema=schema, dataset_file=instance)
            ps.save()

        for p in parameters:
            if p.name in metadata:
                dfp = DatafileParameter(parameterset=ps, name=p)
                if p.isNumeric():
                    if metadata[p.name] != '':
                        dfp.numerical_value = metadata[p.name]
                        dfp.save()
                else:
                    dfp.string_value = metadata[p.name].decode('cp1252')
                    dfp.save()

        return ps
    def test_000_update_df_status_offline(self, mock_stat):
        """update_df_status should check the online status of
        preferred DFOs for all previously online datafiles and
        update online Parameter to 'False' for any offline files."""
        df1 = DataFile(dataset=self.dataset,
                       filename="test_df.jpg")
        df1.save()
        dfo1 = DataFileObject(datafile=df1,
                              storage_box=self.sbox1,
                              uri="stream/test.jpg",
                              verified=True)
        dfo1.save()

        schema = Schema.objects.get(namespace=HSM_DATAFILE_NAMESPACE)
        ps = DatafileParameterSet(schema=schema, datafile=df1)
        ps.save()

        param_name = ParameterName.objects.get(schema=schema, name="online")
        param = DatafileParameter(parameterset=ps, name=param_name)
        param.string_value = True
        param.save()

        mock_stat.return_value = Stats(st_size=10000,
                                       st_blocks=0,
                                       st_mtime=datetime.now())
        update_df_status()

        params = DatafileParameter.objects.filter(
            parameterset__schema=schema,
            parameterset__datafile=df1)

        self.assertEquals(params.count(), 1)
        self.assertEquals(params[0].string_value, "False")
Esempio n. 3
0
def register_squashfile(exp_id, epn, sq_dir, sq_filename, namespace):
    '''
    example:
    register_squashfile(456, '1234A', '/srv/squashstore', '1234A.squashfs',
        'http://synchrotron.org.au/mx/squashfsarchive/1')
    '''
    dfs = DataFile.objects.filter(filename=sq_filename,
                                  dataset__experiments__id=exp_id)
    if len(dfs) == 1:
        return dfs[0]
    e = Experiment.objects.get(id=exp_id)
    ds = Dataset(description="01 SquashFS Archive")
    ds.save()
    ds.experiments.add(e)
    filepath = os.path.join(sq_dir, sq_filename)
    try:
        md5sum = open(filepath + '.md5sum', 'r').read().strip()[:32]
    except:
        print 'no md5sum file found'
        return None
    size = os.path.getsize(filepath)
    df = DataFile(md5sum=md5sum,
                  filename=sq_filename,
                  size=str(size),
                  dataset=ds)
    df.save()
    schema = Schema.objects.filter(namespace=namespace)[0]
    ps = DatafileParameterSet(schema=schema, datafile=df)
    ps.save()
    ps.set_param('EPN', epn)
    sbox = StorageBox.objects.get(name='squashstore')
    dfo = DataFileObject(storage_box=sbox, datafile=df, uri=sq_filename)
    dfo.save()
    return df
    def test_003_update_df_status_skip_offline(self, mock_stat, mock_df_online):
        """update_df_status should skip any files that have previously
        marked as offline."""
        df2 = DataFile(dataset=self.dataset,
                       filename="test_df2.jpg")
        df2.save()
        dfo2 = DataFileObject(datafile=df2,
                              storage_box=self.sbox1,
                              uri="stream/test_df2.jpg",
                              verified=True)
        dfo2.save()
        # df2.verify()

        schema = Schema.objects.get(namespace=HSM_DATAFILE_NAMESPACE)
        ps2 = DatafileParameterSet(schema=schema, datafile=df2)
        ps2.save()

        param_name = ParameterName.objects.get(schema=schema, name="online")
        param2 = DatafileParameter(parameterset=ps2, name=param_name)
        param2.string_value = False
        param2.save()

        mock_stat.return_value = Stats(st_size=10000,
                                       st_blocks=100,
                                       st_mtime=datetime.now())
        update_df_status()

        # assert that the df_online method wasn't called
        self.assertEquals(mock_df_online.call_count, 0)
Esempio n. 5
0
    def saveExifMetadata(self, instance, schema, metadata):
        """Save all the metadata to a Dataset_Files paramamter set.
        """
        parameters = self.getParamaters(schema, metadata)
        if not parameters:
            return None

        try:
            ps = DatafileParameterSet.objects.get(schema=schema,
                                                  dataset_file=instance)
            return ps  # if already exists then just return it
        except DatafileParameterSet.DoesNotExist:
            ps = DatafileParameterSet(schema=schema,
                                      dataset_file=instance)
            ps.save()

        for p in parameters:
            if p.name in metadata:
                dfp = DatafileParameter(parameterset=ps,
                                        name=p)
                if p.isNumeric():
                    dfp.numerical_value = metadata[p.name]
                else:
                    dfp.string_value = metadata[p.name]
                dfp.save()
        return ps
    def saveMetadata(self, instance, schema, metadata):
        """Save all the metadata to a Dataset_Files paramamter set.
        """
        parameters = self.getParameters(schema, metadata)

        exclude_line = dict()
        exclude_line['-----'] = None
        exclude_line['Reading global metadata'] = None
        exclude_line['Reading metadata'] = None
        exclude_line['Reading core metadata'] = None
        exclude_line['Populating metadata'] = None
        exclude_line['Reading tags'] = None
        exclude_line['Verifying Gatan format'] = None
        exclude_line['Initializing reader'] = None
        exclude_line['Checking file format [Gatan Digital Micrograph]'] = None

        if not parameters:
            return None

        try:
            ps = DatafileParameterSet.objects.get(schema=schema,
                                                  dataset_file=instance)
            return ps  # if already exists then just return it
        except DatafileParameterSet.DoesNotExist:
            ps = DatafileParameterSet(schema=schema,
                                      dataset_file=instance)
            ps.save()

        for p in parameters:
            print p.name
            if p.name in metadata:
                dfp = DatafileParameter(parameterset=ps,
                                        name=p)
                if p.isNumeric():
                    if metadata[p.name] != '':
                        dfp.numerical_value = metadata[p.name]
                        dfp.save()
                else:
                    print p.name
                    if isinstance(metadata[p.name], list):
                        for val in reversed(metadata[p.name]):
                            strip_val = val.strip()
                            if strip_val:
                                if not strip_val in exclude_line:
                                    dfp = DatafileParameter(parameterset=ps,
                                                            name=p)
                                    dfp.string_value = strip_val
                                    dfp.save()
                    else:
                        dfp.string_value = metadata[p.name]
                        dfp.save()

        return ps
Esempio n. 7
0
def create_df_status(datafile, schema_name, min_file_size):
    """Post-save celery task that checks online status of new file and create
    HSM metadata to track online status.

    Parameters
    ----------
    datafile: DataFile
        datafile to check and create online/offline status
        metadata for
    schema_name: Schema
        name of Schema which describes ParameterNames
    min_file_size : int
        minimum size of files to check HSM status of. This
        param is simply passed on to df_online.

    Returns
    -------
    None
    """
    if datafile.verified:
        with DatafileLock(datafile, "datafile-%s" % datafile.id) as lock:
            if lock:
                schema = Schema.objects.get(namespace=schema_name)
                if DatafileParameterSet.objects.filter(
                        schema=schema, datafile=datafile).exists():
                    LOGGER.debug(
                        """HSM DatafileParameterSet already exists for: %s""",
                        datafile.id
                    )
                    return

                ps = DatafileParameterSet(schema=schema, datafile=datafile)
                ps.save()

                param_name = ParameterName.objects.get(
                    schema=schema,
                    name="online"
                )

                dfp = DatafileParameter(parameterset=ps, name=param_name)
                dfp.string_value = str(df_online(datafile, min_file_size))
                dfp.save()

    else:
        LOGGER.warning(
            """Cannot determine online/offline status for datafile %s "
            "is not verified""",
            datafile.id
        )
Esempio n. 8
0
    def test_unresolvable_link_parameter(self):
        """
        Test that LINK Parameters that can't be resolved to a model (including
        non-URL values) still work.
        """
        self.datafileparameterset3 = DatafileParameterSet(
            schema=self.schema, datafile=self.datafile)
        self.datafileparameterset3.save()

        psm = ParameterSetManager(parameterset=self.datafileparameterset3)

        # Create a Parameter of type LINK to an unresolvable (non-URL)
        # free-text value
        self.freetext_link_param = DatafileParameter(
            parameterset=self.datafileparameterset3,
            name=self.parametername_unresolvable_link)
        self.assertRaises(
            SuspiciousOperation,
            lambda: self.freetext_link_param.set_value("FREETEXT_ID_123"))
    def saveMetadata(self, instance, schema, metadata):
        """Save all the metadata to a Dataset_Files parameter set.
        """
	logger.error('Olympus-saveMetadata()')
        parameters = self.getParameters(schema, metadata)

        if not parameters:
            return None

        try:
            ps = DatafileParameterSet.objects.get(schema=schema,
                                                  dataset_file=instance)
            return ps  # if already exists then just return it
        except DatafileParameterSet.DoesNotExist:
            ps = DatafileParameterSet(schema=schema,
                                      dataset_file=instance)
            ps.save()

	for p in parameters:
            print p.name
            if p.name in metadata:
                dfp = DatafileParameter(parameterset=ps,
                                        name=p)
                if p.isNumeric():
                    if metadata[p.name] != '':
                        dfp.numerical_value = metadata[p.name]
                        dfp.save()
                else:
                    print p.name
                    if isinstance(metadata[p.name], list):
                        for val in reversed(metadata[p.name]):
                            strip_val = val.strip()
                            if strip_val:
                                if not strip_val in exclude_line:
                                    dfp = DatafileParameter(parameterset=ps,
                                                            name=p)
                                    dfp.string_value = strip_val
                                    dfp.save()
                    else:
                        dfp.string_value = metadata[p.name]
                        dfp.save()

        return ps
Esempio n. 10
0
    def setUp(self):
        """
        setting up essential objects, copied from tests above
        """
        user = '******'
        pwd = 'secret'
        email = ''
        self.user = User.objects.create_user(user, email, pwd)
        self.userProfile = self.user.userprofile
        self.exp = Experiment(title='test exp1',
                              institution_name='monash',
                              created_by=self.user)
        self.exp.save()
        self.acl = ObjectACL(
            pluginId=django_user,
            entityId=str(self.user.id),
            content_object=self.exp,
            canRead=True,
            isOwner=True,
            aclOwnershipType=ObjectACL.OWNER_OWNED,
        )
        self.acl.save()
        self.dataset = Dataset(description='dataset description...')
        self.dataset.save()
        self.dataset.experiments.add(self.exp)
        self.dataset.save()

        self.datafile = DataFile(dataset=self.dataset,
                                 size=42,
                                 filename="foo",
                                 md5sum="junk")
        self.datafile.save()

        self.testschema = Schema(namespace="http://test.com/test/schema",
                                 name="Test View",
                                 type=Schema.DATAFILE,
                                 hidden=True)
        self.testschema.save()
        self.dfps = DatafileParameterSet(datafile=self.datafile,
                                         schema=self.testschema)
        self.dfps.save()
Esempio n. 11
0
    def test_link_parameter_type_extra(self):
        # make a second ParameterSet for testing some variations
        # in URL values
        self.datafileparameterset2 = DatafileParameterSet(
            schema=self.schema, datafile=self.datafile)
        self.datafileparameterset2.save()

        psm = ParameterSetManager(parameterset=self.datafileparameterset2)

        self.dataset_link_param2 = DatafileParameter(
            parameterset=self.datafileparameterset2,
            name=self.parametername_dataset_link)
        # /dataset/1 - no trailing slash
        dataset_url = self.dataset.get_absolute_url()
        self.dataset_link_param2.set_value(dataset_url)
        self.dataset_link_param2.save()

        # Check link_id/link_ct/link_gfk to dataset
        self.assertTrue(
            psm.get_param("dataset_link").link_id == self.dataset.id)

        dataset_ct = ContentType.objects.get(model__iexact="dataset")
        self.assertTrue(psm.get_param("dataset_link").link_ct == dataset_ct)

        self.assertTrue(psm.get_param("dataset_link").link_gfk == self.dataset)

        # Test links of the form /api/v1/experiment/<experiment_id>/
        self.exp_link_param2 = DatafileParameter(
            parameterset=self.datafileparameterset2,
            name=self.parametername_exp_link)
        exp_url = '/api/v1/experiment/%s/' % self.exp.id
        self.exp_link_param2.set_value(exp_url)
        self.exp_link_param2.save()

        # Check link_id/link_ct/link_gfk to experiment
        self.assertTrue(psm.get_param("exp_link").link_id == self.exp.id)

        exp_ct = ContentType.objects.get(model__iexact="experiment")
        self.assertTrue(psm.get_param("exp_link").link_ct == exp_ct)

        self.assertTrue(psm.get_param("exp_link").link_gfk == self.exp)
    def test_002_update_df_status_skip_unverified(self, mock_stat, df_online):
        """update_df_status should skip files that are unverified"""
        df2 = DataFile(dataset=self.dataset,
                       filename="test_df2.jpg")
        df2.save()
        dfo2 = DataFileObject(datafile=df2,
                              storage_box=self.sbox1,
                              uri="stream/test_df2.jpg")
        dfo2.save()

        schema = Schema.objects.get(namespace=HSM_DATAFILE_NAMESPACE)
        ps2 = DatafileParameterSet(schema=schema, datafile=df2)
        ps2.save()

        param_name = ParameterName.objects.get(schema=schema, name="online")
        param2 = DatafileParameter(parameterset=ps2, name=param_name)
        param2.string_value = True
        param2.save()

        mock_stat.return_value = Stats(st_size=10000,
                                       st_blocks=100,
                                       st_mtime=datetime.now())
        update_df_status()
        df_online.assert_not_called()
Esempio n. 13
0
def process_meta(func, df, schema_name, overwrite=False, **kwargs):
    """Extract metadata from a Datafile using a provided function and save the
    outputs as DatafileParameters.

    Parameters
    ----------
    func: Function
        Function to extract metadata from a file. Function must have
        input_file_path as an argument e.g.:
        def meta_proc(input_file_path, **kwargs):
            ...
        It must return a dict containing ParameterNames as keys and the
        Parameters to be saved as values. Parameters (values) can be singular
        strings/numerics or a list of strings/numeric. If it's a list, each
        element will be saved as a new DatafileParameter.
    df: tardis.tardis_portal.models.Datafile
        Datafile instance to process.
    schema_name: str
        Names of schema which describes ParameterNames
    add: boolean (default: False)
        Specifies whether or not to add to an existing Parameterset for this
        Datafile rather that overwriting or exiting.
    overwrite: boolean (default: False)
        Specifies whether to overwrite any exisiting parametersets for
        this datafile.


    Returns
    -------
    None
    """
    if acquire_datafile_lock(df.id):
        # Need to start a JVM in each thread
        check_and_start_jvm()

        try:
            javabridge.attach()
            log4j.basic_config()
            schema = Schema.objects.get(namespace__exact=schema_name)
            if DatafileParameterSet.objects\
                    .filter(schema=schema, datafile=df).exists():
                if overwrite:
                    psets = DatafileParameterSet.objects.get(schema=schema,
                                                             datafile=df)
                    logger.warning("Overwriting parametersets for %s"
                                   % df.filename)
                    [delete_old_parameterset(ps) for ps in psets]
                else:
                    logger.warning("Parametersets for %s already exist."
                                   % df.filename)
                    return

            dfo = DataFileObject.objects.filter(datafile__id=df.id,
                                                verified=True).first()
            input_file_path = dfo.get_full_path()

            logger.debug("Processing file: %s" % input_file_path)
            metadata_params = func(input_file_path, **kwargs)

            if not metadata_params:
                logger.debug("No metadata to save")
                return

            for sm in metadata_params:
                ps = DatafileParameterSet(schema=schema, datafile=df)
                ps.save()

                logger.debug("Saving parameters for: %s" % input_file_path)
                save_parameters(schema, ps, sm)
        except Exception, e:
            logger.debug(e)
        finally:
Esempio n. 14
0
    def __init__(self, parameterset=None, parentObject=None,
                 schema=None):
        """
        instantiate new task or existing ParameterSet
        :param dataset: optional parameter to instanciate task from
          metadata, will be tested for completeness and copied into
          new task if complete
        :type dataset: Dataset
        :param schema: Schema namespace
        :type schema: string
        """

        if parameterset:
            self.parameterset = parameterset
            self.schema = self.parameterset.schema
            self.namespace = self.schema.namespace

            if isinstance(self.parameterset, DatafileParameterSet):
                self.parameters = DatafileParameter.objects.filter(\
                   parameterset=self.parameterset).order_by('name__full_name')

                self.blank_param = DatafileParameter

            elif isinstance(self.parameterset, DatasetParameterSet):
                self.parameters = DatasetParameter.objects.filter(\
                   parameterset=self.parameterset).order_by('name__full_name')

                self.blank_param = DatasetParameter

            elif isinstance(self.parameterset, ExperimentParameterSet):
                self.parameters = ExperimentParameter.objects.filter(\
                   parameterset=self.parameterset).order_by('name__full_name')

                self.blank_param = ExperimentParameter

            else:
                raise TypeError("Invalid parameterset object given.")

        elif parentObject and schema:

            self.namespace = schema

            if isinstance(parentObject, Dataset_File):
                self.parameterset = DatafileParameterSet(
                    schema=self.get_schema(), dataset_file=parentObject)

                self.parameterset.save()

                self.parameters = DatafileParameter.objects.filter(
                    parameterset=self.parameterset)

                self.blank_param = DatafileParameter

            elif isinstance(parentObject, Dataset):
                self.parameterset = DatasetParameterSet(
                    schema=self.get_schema(), dataset=parentObject)

                self.parameterset.save()

                self.parameters = DatasetParameter.objects.filter(
                    parameterset=self.parameterset)

                self.blank_param = DatasetParameter

            elif isinstance(parentObject, Experiment):
                self.parameterset = ExperimentParameterSet(
                    schema=self.get_schema(), experiment=parentObject)

                self.parameterset.save()

                self.parameters = ExperimentParameter.objects.filter(
                    parameterset=self.parameterset)

                self.blank_param = ExperimentParameter

            else:
                raise TypeError("Invalid parent object." +
                    "Must be an experiment/dataset/datafile not " + str(type(parentObject)))

        else:
            raise TypeError("Missing arguments")
Esempio n. 15
0
    def setUp(self):
        from django.contrib.auth.models import User
        from tempfile import mkdtemp

        user = '******'
        pwd = 'secret'
        email = ''
        self.user = User.objects.create_user(user, email, pwd)

        self.test_dir = mkdtemp()

        self.exp = Experiment(title='test exp1',
                              institution_name='monash',
                              created_by=self.user)
        self.exp.save()

        self.dataset = Dataset(description="dataset description...")
        self.dataset.save()
        self.dataset.experiments.add(self.exp)
        self.dataset.save()

        self.datafile = DataFile(dataset=self.dataset,
                                 filename="testfile.txt",
                                 size="42",
                                 md5sum='bogus')
        self.datafile.save()

        self.dfo = DataFileObject(
            datafile=self.datafile,
            storage_box=self.datafile.get_default_storage_box(),
            uri="1/testfile.txt")
        self.dfo.save()

        self.schema = Schema(namespace="http://localhost/psmtest/df/",
                             name="Parameter Set Manager",
                             type=3)
        self.schema.save()

        self.parametername1 = ParameterName(schema=self.schema,
                                            name="parameter1",
                                            full_name="Parameter 1")
        self.parametername1.save()

        self.parametername2 = ParameterName(schema=self.schema,
                                            name="parameter2",
                                            full_name="Parameter 2",
                                            data_type=ParameterName.NUMERIC)
        self.parametername2.save()

        self.parametername3 = ParameterName(schema=self.schema,
                                            name="parameter3",
                                            full_name="Parameter 3",
                                            data_type=ParameterName.DATETIME)
        self.parametername3.save()

        self.datafileparameterset = DatafileParameterSet(
            schema=self.schema, datafile=self.datafile)
        self.datafileparameterset.save()

        self.datafileparameter1 = DatafileParameter(
            parameterset=self.datafileparameterset,
            name=self.parametername1,
            string_value="test1")
        self.datafileparameter1.save()

        self.datafileparameter2 = DatafileParameter(
            parameterset=self.datafileparameterset,
            name=self.parametername2,
            numerical_value=2)
        self.datafileparameter2.save()

        # Create a ParameterName and Parameter of type LINK to an experiment
        self.parametername_exp_link = ParameterName(
            schema=self.schema,
            name="exp_link",
            full_name="This parameter is a experiment LINK",
            data_type=ParameterName.LINK)
        self.parametername_exp_link.save()

        self.exp_link_param = DatafileParameter(
            parameterset=self.datafileparameterset,
            name=self.parametername_exp_link)
        exp_url = self.exp.get_absolute_url()  # /experiment/view/1/
        self.exp_link_param.set_value(exp_url)
        self.exp_link_param.save()

        # Create a ParameterName and Parameter of type LINK to a dataset
        self.parametername_dataset_link = ParameterName(
            schema=self.schema,
            name="dataset_link",
            full_name="This parameter is a dataset LINK",
            data_type=ParameterName.LINK)
        self.parametername_dataset_link.save()

        self.dataset_link_param = DatafileParameter(
            parameterset=self.datafileparameterset,
            name=self.parametername_dataset_link)
        dataset_url = self.dataset.get_absolute_url()  # /dataset/1/
        self.dataset_link_param.set_value(dataset_url)
        self.dataset_link_param.save()

        # Create a ParameterName type LINK to an unresolvable (non-URL)
        # free-text value
        self.parametername_unresolvable_link = ParameterName(
            schema=self.schema,
            name="freetext_link",
            full_name="This parameter is a non-URL LINK",
            data_type=ParameterName.LINK)
        self.parametername_unresolvable_link.save()
Esempio n. 16
0
    def test_parameter(self):
        exp = Experiment(
            title='test exp1',
            institution_name='Australian Synchrotron',
            approved=True,
            created_by=self.user,
            public_access=Experiment.PUBLIC_ACCESS_NONE,
        )
        exp.save()

        dataset = Dataset(description="dataset description")
        dataset.save()
        dataset.experiments.add(exp)
        dataset.save()

        df_file = DataFile(dataset=dataset,
                           filename='file.txt',
                           size=42,
                           md5sum='bogus')
        df_file.save()

        df_schema = Schema(namespace='http://www.cern.ch/felzmann/schema1.xml',
                           type=Schema.DATAFILE)
        df_schema.save()

        ds_schema = Schema(namespace='http://www.cern.ch/felzmann/schema2.xml',
                           type=Schema.DATASET)
        ds_schema.save()

        exp_schema = Schema(
            namespace='http://www.cern.ch/felzmann/schema3.xml',
            type=Schema.EXPERIMENT)
        exp_schema.save()

        df_parname = ParameterName(schema=df_schema,
                                   name='name',
                                   full_name='full_name',
                                   units='image/jpg',
                                   data_type=ParameterName.FILENAME)
        df_parname.save()

        ds_parname = ParameterName(schema=ds_schema,
                                   name='name',
                                   full_name='full_name',
                                   units='image/jpg',
                                   data_type=ParameterName.FILENAME)
        ds_parname.save()

        exp_parname = ParameterName(schema=exp_schema,
                                    name='name',
                                    full_name='full_name',
                                    units='image/jpg',
                                    data_type=ParameterName.FILENAME)
        exp_parname.save()

        df_parset = DatafileParameterSet(schema=df_schema, datafile=df_file)
        df_parset.save()

        ds_parset = DatasetParameterSet(schema=ds_schema, dataset=dataset)
        ds_parset.save()

        exp_parset = ExperimentParameterSet(schema=exp_schema, experiment=exp)
        exp_parset.save()

        with self.settings(METADATA_STORE_PATH=os.path.dirname(__file__)):
            filename = 'test.jpg'
            df_parameter = DatafileParameter(name=df_parname,
                                             parameterset=df_parset,
                                             string_value=filename)
            df_parameter.save()

            ds_parameter = DatasetParameter(name=ds_parname,
                                            parameterset=ds_parset,
                                            string_value=filename)
            ds_parameter.save()

            exp_parameter = ExperimentParameter(name=exp_parname,
                                                parameterset=exp_parset,
                                                string_value=filename)
            exp_parameter.save()

            self.assertEqual(
                "<a href='/display/DatafileImage/load/%i/' target='_blank'><img style='width: 300px;' src='/display/DatafileImage/load/%i/' /></a>"
                %  # noqa
                (df_parameter.id, df_parameter.id),
                df_parameter.get())

            self.assertEqual(
                "<a href='/display/DatasetImage/load/%i/' target='_blank'><img style='width: 300px;' src='/display/DatasetImage/load/%i/' /></a>"
                %  # noqa
                (ds_parameter.id, ds_parameter.id),
                ds_parameter.get())

            self.assertEqual(
                "<a href='/display/ExperimentImage/load/%i/' target='_blank'><img style='width: 300px;' src='/display/ExperimentImage/load/%i/' /></a>"
                %  # noqa
                (exp_parameter.id, exp_parameter.id),
                exp_parameter.get())
Esempio n. 17
0
def process_meta_file_output(df_id, schema_name, overwrite=False, **kwargs):
    """Extract metadata from a Datafile using the get_meta function and save the
    outputs as DatafileParameters. This function differs from process_meta in
    that it generates an output directory in the metadata store and passes it
    to the metadata processing func so that outputs (e.g., preview images or
    metadata files) can be saved.

    Parameters
    ----------
    df_id: int
        ID of Datafile instance to process.
    schema_name: str
        Names of schema which describes ParameterNames
    add: Boolean (default: False)
        Specifies whether or not to add to an existing Parameterset for this
        Datafile rather that overwriting or exiting.
    overwrite: Boolean (default: False)
        Specifies whether to overwrite any exisiting parametersets for
        this datafile.


    Returns
    -------
    None
    """
    from .metadata import get_meta

    if acquire_datafile_lock(df_id):
        # Need to start a JVM in each thread
        check_and_start_jvm()

        try:
            javabridge.attach()
            log4j.basic_config()
            schema = Schema.objects.get(namespace__exact=schema_name)
            df = DataFile.objects.get(id=df_id)
            if DatafileParameterSet.objects\
                    .filter(schema=schema, datafile=df).exists():
                if overwrite:
                    psets = DatafileParameterSet.objects.get(schema=schema,
                                                             datafile=df)
                    logger.warning("Overwriting parametersets for %s",
                                   df.filename)
                    for ps in psets:
                        delete_old_parameterset(ps)
                else:
                    logger.warning("Parametersets for %s already exist.",
                                   df.filename)
                    return

            dfo = DataFileObject.objects.filter(datafile__id=df.id,
                                                verified=True).first()
            input_file_path = dfo.get_full_path()

            output_rel_path = os.path.join(
                os.path.dirname(urlparse.urlparse(dfo.uri).path), str(df.id))
            output_path = os.path.join(settings.METADATA_STORE_PATH,
                                       output_rel_path)

            if not os.path.exists(output_path):
                os.makedirs(output_path)

            logger.debug("Processing file: %s" % input_file_path)
            metadata_params = get_meta(input_file_path, output_path, **kwargs)
            if not metadata_params:
                logger.debug("No metadata to save")
                return

            for sm in metadata_params:
                ps = DatafileParameterSet(schema=schema, datafile=df)
                ps.save()

                logger.debug("Saving parameters for: %s", input_file_path)
                save_parameters(schema, ps, sm)
        except Exception as err:
            logger.exception(err)
        finally:
            release_datafile_lock(df_id)
            javabridge.detach()
Esempio n. 18
0
def process_meta(df_id, schema_name, overwrite=False, **kwargs):
    """Extract metadata from a Datafile using the get_meta function and save the
    outputs as DatafileParameters.

    Parameters
    ----------
    df_id: int
        ID of Datafile instance to process.
    schema_name: str
        Names of schema which describes ParameterNames
    add: boolean (default: False)
        Specifies whether or not to add to an existing Parameterset for this
        Datafile rather that overwriting or exiting.
    overwrite: boolean (default: False)
        Specifies whether to overwrite any exisiting parametersets for
        this datafile.


    Returns
    -------
    None
    """
    from .metadata import get_meta

    if acquire_datafile_lock(df_id):
        # Need to start a JVM in each thread
        check_and_start_jvm()

        try:
            javabridge.attach()
            log4j.basic_config()
            schema = Schema.objects.get(namespace__exact=schema_name)
            df = DataFile.objects.get(id=df_id)
            if DatafileParameterSet.objects\
                    .filter(schema=schema, datafile=df).exists():
                if overwrite:
                    psets = DatafileParameterSet.objects.get(schema=schema,
                                                             datafile=df)
                    logger.warning("Overwriting parametersets for %s",
                                   df.filename)
                    for ps in psets:
                        delete_old_parameterset(ps)
                else:
                    logger.warning("Parametersets for %s already exist.",
                                   df.filename)
                    return

            dfo = DataFileObject.objects.filter(datafile__id=df.id,
                                                verified=True).first()
            input_file_path = dfo.get_full_path()

            logger.debug("Processing file: %s", input_file_path)
            metadata_params = get_meta(input_file_path, **kwargs)

            if not metadata_params:
                logger.debug("No metadata to save")
                return

            for sm in metadata_params:
                ps = DatafileParameterSet(schema=schema, datafile=df)
                ps.save()

                logger.debug("Saving parameters for: %s", input_file_path)
                save_parameters(schema, ps, sm)
        except Exception as err:
            logger.exception(err)
        finally:
            release_datafile_lock(df_id)
            javabridge.detach()