Ejemplo n.º 1
0
 def _create_experiment(self, data=None):
     from tardis.tardis_portal import models, forms
     from os.path import basename
     from django.contrib.auth.models import User
     data = self._data_to_post(data)
     exp = models.Experiment(
         title=data['title'],
         institution_name=data['institution_name'],
         description=data['description'],
         created_by=User.objects.get(id=data['created_by']),
     )
     exp.save()
     for i, a in enumerate(data['authors'].split(', ')):
         ae = models.Author_Experiment(experiment=exp, author=a, order=i)
         ae.save()
     ds_desc = {
         'first one': ['file/another.py'],
         'second': ['second_ds/file.py', 'second_ds/file1.py']
     }
     for d, df in ds_desc.items():
         dataset = models.Dataset(description=d, experiment=exp)
         dataset.save()
         for f in df:
             d = models.Dataset_File(url='file://' + f,
                                     dataset=dataset,
                                     filename=basename(f))
             d.save()
     return exp
Ejemplo n.º 2
0
    def test_authors(self):
        from tardis.tardis_portal import models
        exp = models.Experiment(title='test exp2',
                                institution_name='monash',
                                created_by=self.user,
                                )
        exp.save()

        models.Author_Experiment(experiment=exp,
                                 author='nigel',
                                 order=0).save()

        exp = models.Experiment(title='test exp1',
                                institution_name='monash',
                                created_by=self.user,
                                )
        exp.save()

        ae1 = models.Author_Experiment(experiment=exp,
                                       author='steve',
                                       order=100)
        ae1.save()

        ae2 = models.Author_Experiment(experiment=exp,
                                       author='russell',
                                       order=1)
        ae2.save()

        ae3 = models.Author_Experiment(experiment=exp,
                                       author='uli',
                                       order=50)
        ae3.save()

        authors = exp.author_experiment_set.all()

        # confirm that there are 2 authors
        self.assertEqual(len(authors), 3)
        self.assertTrue(ae1 in authors)
        self.assertTrue(ae2 in authors)
        self.assertTrue(ae3 == authors[1])
Ejemplo n.º 3
0
 def _update_authors(self, data):
     # For each author in the POST in a position
     for data in self._parse_authors(data):
         try:
             # Get the current author for that position
             o_ae = self.author_experiments[data['order']]
             # Update the author form for that position with the new data
             self.author_experiments[data['order']] = \
                 Author_Experiment(data=data,
                                   instance=o_ae.instance)
         except IndexError:
             # Or create an author for that position
             o_ae = Author_Experiment(data=data,
                                      instance=models.Author_Experiment())
             self.author_experiments.append(o_ae)
Ejemplo n.º 4
0
    def _fill_authors(self, authors):
        if self.instance:
            o_author_experiments = \
                self.instance.author_experiment_set.all()
        else:
            o_author_experiments = []
        for num, author in enumerate(authors):
            try:
                o_ae = o_author_experiments[num]
            except IndexError:
                o_ae = models.Author_Experiment()
                o_ae.experiment = self.instance
            f = Author_Experiment(data={'author': author,
                                        'order': num},
                                  instance=o_ae)
            self.author_experiments.append(f)

        self.fields['authors'] = \
            MultiValueCommaSeparatedField([author.fields['author'] for
                                        author in self.author_experiments],
                                        widget=CommaSeparatedInput())
Ejemplo n.º 5
0
def createhpcexperiment(request, user, dl):

    from django.contrib.auth.models import User
    from tardis.tardis_portal.views import _registerExperimentDocument
    import os
    import tempfile

    #TODO
    temp = tempfile.TemporaryFile()
    for chunk in dl.chunks():
        temp.write(chunk)
    temp.seek(0)

    metadata = {}

    for line in temp:
        key, value = line.split('~')
        metadata[key] = value
    temp.close()

    author = metadata['Name']
    title = metadata['Experiment']
    ds_desc = metadata['Facility']
    desc = metadata['Description']
    fname = metadata['FolderName']
    counter = metadata['Counter']
    package = metadata['Package']

    exp = Experiment(
        title=title,
        institution_name="RMIT University",
        description=desc,
        created_by=User.objects.get(id=user.pk),
    )
    exp.save()
    eid = exp.id
    # Store the author for the dataset
    ae = models.Author_Experiment(experiment=exp, author=author, order='1')
    ae.save()

    auth_key = settings.DEFAULT_AUTH

    try:
        e = Experiment.objects.get(pk=eid)
    except Experiment.DoesNotExist:
        logger.exception(
            'Experiment for eid %i in CreateHPCExperiment does not exist' %
            eid)

    acl = ExperimentACL(experiment=e,
                        pluginId=django_user,
                        entityId=str(user.id),
                        canRead=True,
                        canWrite=True,
                        canDelete=True,
                        isOwner=True,
                        aclOwnershipType=ExperimentACL.OWNER_OWNED)
    acl.save()

    folder_desc = "%s.%s.%s.%s" % (ds_desc.strip(), package.strip(),
                                   fname.strip(), counter.strip())
    logger.debug('folder_desc = %s' % folder_desc)
    return eid, exp, folder_desc
Ejemplo n.º 6
0
    def endElementNS(self, name, qname):
        # just get the element name without the namespace
        elName = name[1]

        if elName == 'dmdSec':
            self.inDmdSec = False
            # if we currently processing an experiment structure, let's
            # save the institution value before we finalise the experiment
            if self.processExperimentStruct:
                self.metsObject.institution = self.institution

                # let's save the experiment in the DB
                if self.tardisExpId:
                    self.modelExperiment = models.Experiment.objects.get(
                        pk=self.tardisExpId)
                else:
                    self.modelExperiment = models.Experiment()
                self.modelExperiment.id = self.tardisExpId
                self.modelExperiment.url = self.metsObject.url
                self.modelExperiment.approved = True
                self.modelExperiment.title = self.metsObject.title
                self.modelExperiment.institution_name = \
                                            self.metsObject.institution
                self.modelExperiment.description = self.metsObject.description
                self.modelExperiment.start_time = self.metsObject.start_time
                self.modelExperiment.end_time = self.metsObject.end_time
                self.modelExperiment.created_by = self.createdBy

                self.modelExperiment.save()

                self.holder.experimentDatabaseId = self.modelExperiment.id

                x = 0
                for author in self.metsObject.authors:
                    author_experiment = models.Author_Experiment(
                        experiment=self.modelExperiment,
                        author=author, order=x)
                    author_experiment.save()
                    x = x + 1

            elif self.processDatasetStruct:
                # let's save the dataset in the DB
                self.modelDataset = models.Dataset(
                    description=self.metsObject.title,
                    immutable=settings.IMMUTABLE_METS_DATASETS)
                self.modelDataset.save()
                self.modelDataset.experiments.add(self.modelExperiment)
                self.modelDataset.save()

                # let's also save the modelDataset in a dictionary so that we
                # can look it up easily later on when we start processing
                # the datafiles.
                self.datasetLookupDict[self.metsObject.id] = self.modelDataset

            self.metsObject = None

            self.processExperimentStruct = False
            self.processDatasetStruct = False

        elif elName == 'title' and self.inDmdSec:
            self.grabTitle = False

        elif elName == 'startTime' and self.processExperimentStruct:
            self.grabStartTime = False

        elif elName == 'endTime' and self.processExperimentStruct:
            self.grabEndTime = False

        elif elName == 'url' and self.processExperimentStruct:
            self.grabExperimentUrl = False

        elif elName == 'abstract' and self.processExperimentStruct:
            self.grabAbstract = False

        elif elName == 'name' and self.processExperimentStruct:
            self.inName = False

        elif elName == 'namePart' and self.inName:
            self.grabMightBeAuthor = False

        elif elName == 'roleTerm' and self.inName:
            self.grabRoleTerm = False
            self.mightBeAuthor = None

        elif elName == 'name' and self.inInstitution:
            self.grabInstitution = False

        elif elName == 'agent':
            self.inInstitution = False

        elif elName == 'amdSec':
            # we're done processing the metadata entries
            self.inAmdSec = False

            # let's reset the cached experiment model object
            self.modelExperiment = None

        elif elName == 'techMD' and self.inAmdSec:
            self.inTechMd = False
            self.metadataId = None
            self.metsObject = None
            self.processMetadata = False

        elif elName == 'xmlData' and self.inTechMd:
            self.inXmlData = False

        elif elName != self.xmlDataChildElement and \
                self.customHandler is not None:
            self.customHandler.endElement(elName)

        elif elName == self.xmlDataChildElement and self.inXmlData:

            if self.customHandler is not None:
                self.tempMetadataHolder = self.customHandler.metadataDict

            try:
                schema = models.Schema.objects.get(
                    namespace__exact=self.elementNamespace)

                # get the associated parameter names for the given schema
                parameterNames = \
                    models.ParameterName.objects.filter(
                    schema__namespace__exact=schema.namespace).order_by('id')

                # let's create a trigger holder which we can use to check
                # if we still need to create another parameterset entry in the
                # DB
                createParamSetFlag = {'experiment': True, 'dataset': True,
                                      'datafile': True}
                datasetParameterSet = None
                datafileParameterSet = None

                if self.metadataId  in self.holder.metadataMap:
                    for metsObject in self.holder.metadataMap[self.metadataId]:
                        self.metsObject = metsObject

                        metsObjectClassName = self.metsObject.__class__.__name__

                        if metsObjectClassName == 'Experiment':
                            if createParamSetFlag['experiment']:
                                # create a new parameter set for the metadata
                                parameterSet = \
                                    models.ExperimentParameterSet(
                                    schema=schema,
                                    experiment=self.modelExperiment)
                                parameterSet.save()

                                # now let's process the experiment parameters
                                for parameterName in parameterNames:
                                    if parameterName.name in \
                                            self.tempMetadataHolder:
                                        parameterValues = self.tempMetadataHolder[
                                            parameterName.name]
                                        self._saveParameters('ExperimentParameter',
                                                parameterName, parameterValues,
                                                parameterSet)

                                createParamSetFlag['experiment'] = False

                            else:
                                # this is not even allowed as there's only going
                                # to be one experiment per METS file
                                raise Exception('forbidden state!')

                        elif metsObjectClassName == 'Dataset':
                            if createParamSetFlag['dataset']:
                                dataset = self.datasetLookupDict[
                                    self.metsObject.id]

                                # create a new parameter set for the
                                # dataset metadata
                                datasetParameterSet = \
                                    models.DatasetParameterSet(schema=schema,
                                    dataset=dataset)

                                datasetParameterSet.save()

                                # now let's process the dataset parameters
                                for parameterName in parameterNames:
                                    if parameterName.name in \
                                            self.tempMetadataHolder:
                                        parameterValues = self.tempMetadataHolder[
                                            parameterName.name]
                                        self._saveParameters('DatasetParameter',
                                                parameterName, parameterValues,
                                                datasetParameterSet)

                                # disable creation for the next visit
                                createParamSetFlag['dataset'] = False

                        elif metsObjectClassName == 'Datafile':
                            # this will be a good time to save the
                            # "hard" metadata of this datafile so that
                            # when we start adding "soft" metadata
                            # parameters to it, we already have an
                            # entry for it in the DB

                            # look up the dataset this file belongs to
                            thisFilesDataset = self.datasetLookupDict[
                                self.metsObject.dataset.id]

                            # also check if the file already exists
                            datafile = thisFilesDataset.dataset_file_set.filter(
                                filename=self.metsObject.name, size=self.metsObject.size)

                            if datafile.count() == 0:
                                size = self.metsObject.size

                                if not self.metsObject.size:
                                    size = 0


                                def checksum(obj, type_):
                                    # Check if the checksum is of type
                                    if obj.checksumType != type_:
                                        return ''
                                    checksum = obj.checksum.lower()
                                    # Ensure the checksum is hexdecimal
                                    if not re.match('[0-9a-f]+$', checksum):
                                        return ''
                                    # Get algorithm
                                    try:
                                        name = type_.replace('-','').lower()
                                        alg = getattr(hashlib, name)
                                    except:
                                        return ''
                                    # Check checksum is the correct length
                                    hex_length = alg('').digest_size * 2
                                    if hex_length != len(checksum):
                                        return ''
                                    # Should be valid checksum of given type
                                    return checksum

                                sync_url, proto = get_sync_url_and_protocol(
                                                    self.syncRootDir,
                                                    self.metsObject.url)

                                self.modelDatafile = models.Dataset_File(
                                    dataset=thisFilesDataset,
                                    filename=self.metsObject.name,
                                    url=sync_url,
                                    size=size,
                                    md5sum=checksum(self.metsObject, 'MD5'),
                                    sha512sum=checksum(self.metsObject,
                                                       'SHA-512'),
                                    protocol=proto)

                                self.modelDatafile.save()
                            else:
                                self.modelDatafile = thisFilesDataset.dataset_file_set.get(
                                    filename=self.metsObject.name, size=self.metsObject.size)
                            # TODO: we need to note here that we are
                            # only creating a datafile entry in the DB
                            # for files that have corresponding
                            # metadata. if we are to create a file
                            # entry for files with no metadata, we'll
                            # need to get the unaccessed datafiles
                            # from datasetLookupDict.

                            if createParamSetFlag['datafile']:
                                # create a new parameter set for the metadata
                                datafileParameterSet = \
                                    models.DatafileParameterSet(schema=schema,
                                    dataset_file=self.modelDatafile)
                                datafileParameterSet.save()

                                # now let's process the datafile parameters
                                for parameterName in parameterNames:
                                    if parameterName.name in \
                                            self.tempMetadataHolder:
                                        parameterValues = self.tempMetadataHolder[
                                            parameterName.name]
                                        self._saveParameters('DatafileParameter',
                                                parameterName, parameterValues,
                                                datafileParameterSet)
                                createParamSetFlag['datafile'] = False

            except models.Schema.DoesNotExist:
                logger.warning('unsupported schema being ingested ' +
                    self.elementNamespace)

            # reset the current xmlData child element so that if a new
            # parameter set is read, we can process it again
            self.xmlDataChildElement = None
            self.customHandler = None

        elif elName == self.parameterName and \
                self.xmlDataChildElement is not None:

            # reset self.parameterName to None so the next parameter can be
            # processed
            self.parameterName = None