Esempio n. 1
0
def map_form_to_schemas(extraInfo, publication):
    for form_id, form in extraInfo.iteritems():
        try:  # Ignore form if no schema exists with this name
            schema = Schema.objects.get(namespace=form['schema'])
        except Schema.DoesNotExist:
            continue
        parameter_set = ExperimentParameterSet(
            schema=schema, experiment=publication)
        parameter_set.save()
        for key, value in form.iteritems():
            if key != 'schema':
                try:  # Ignore field if parameter name (key) doesn't match
                    parameter_name = ParameterName.objects.get(
                        schema=schema, name=key)
                    if parameter_name.isNumeric():
                        parameter = ExperimentParameter(
                            name=parameter_name,
                            parameterset=parameter_set,
                            numerical_value=float(value))
                    elif parameter_name.isLongString() or \
                            parameter_name.isString() or \
                            parameter_name.isURL() or \
                            parameter_name.isLink() or \
                            parameter_name.isFilename():
                        parameter = ExperimentParameter(
                            name=parameter_name,
                            parameterset=parameter_set,
                            string_value=str(value))
                    else:
                        # Shouldn't happen, but here in case the parameter type
                        # is non-standard
                        continue
                    parameter.save()
                except ParameterName.DoesNotExist:
                    pass
Esempio n. 2
0
def map_form_to_schemas(extraInfo, publication):
    for form_id, form in extraInfo.iteritems():
        try:  # Ignore form if no schema exists with this name
            schema = Schema.objects.get(namespace=form['schema'])
        except Schema.DoesNotExist:
            continue
        parameter_set = ExperimentParameterSet(schema=schema,
                                               experiment=publication)
        parameter_set.save()
        for key, value in form.iteritems():
            if key != 'schema':
                try:  # Ignore field if parameter name (key) doesn't match
                    parameter_name = ParameterName.objects.get(schema=schema,
                                                               name=key)
                    if parameter_name.isNumeric():
                        parameter = ExperimentParameter(
                            name=parameter_name,
                            parameterset=parameter_set,
                            numerical_value=float(value))
                    elif parameter_name.isLongString() or \
                            parameter_name.isString() or \
                            parameter_name.isURL() or \
                            parameter_name.isLink() or \
                            parameter_name.isFilename():
                        parameter = ExperimentParameter(
                            name=parameter_name,
                            parameterset=parameter_set,
                            string_value=str(value))
                    else:
                        # Shouldn't happen, but here in case the parameter type
                        # is non-standard
                        continue
                    parameter.save()
                except ParameterName.DoesNotExist:
                    pass
Esempio n. 3
0
 def _save_doi(self, doi):
     paramset = self._get_or_create_doi_parameterset()
     ep = ExperimentParameter(parameterset=paramset,
                              name=self.doi_name,
                              string_value=doi)
     ep.save()
     return doi
Esempio n. 4
0
def _update(parameterset, name, string_value):
    param = _get_or_none(ExperimentParameter.objects.all(), parameterset=parameterset, name=name)
    if string_value:
        if not param:
            param = ExperimentParameter(parameterset=parameterset, name=name)
        param.string_value = string_value
        param.save()
    else:
        if param:
            param.delete()
Esempio n. 5
0
    def prevent_expiry(self):
        if not self.parameterset:
            raise Exception('incorrectly initialised, call with create=True')
        params = self.parameterset.experimentparameter_set
        params.all().delete()
        param = ExperimentParameter(name=self.never_expire, string_value='True', parameterset=self.parameterset)
        param.save()

        self.experiment.public = False
        self.experiment.save()
Esempio n. 6
0
def _update(parameterset, name, string_value):
    param = _get_or_none(ExperimentParameter.objects.all(),
                         parameterset=parameterset,
                         name=name)
    if string_value:
        if not param:
            param = ExperimentParameter(parameterset=parameterset, name=name)
        param.string_value = string_value
        param.save()
    else:
        if param:
            param.delete()
Esempio n. 7
0
    def prevent_expiry(self):
        # delete any current expiry dates
        # set never_expires = True
        logger.fatal('preventing expiry')
        if not self.parameterset:
            raise Exception('incorrectly initialised, call with create=True')
        params = self.parameterset.experimentparameter_set
        params.all().delete()
        param = ExperimentParameter(name=self.never_expire, string_value='True', parameterset=self.parameterset)
        param.save()

        self.experiment.public = False
        self.experiment.save()
Esempio n. 8
0
 def add_if_missing(parameterset, name, string_value=None,
                    numerical_value=None, datetime_value=None):
     try:
         ExperimentParameter.objects.get(
             name__name=name, parameterset=parameterset)
     except ExperimentParameter.DoesNotExist:
         param_name = ParameterName.objects.get(
             name=name, schema=parameterset.schema)
         param = ExperimentParameter(name=param_name,
                                     parameterset=parameterset)
         param.string_value = string_value
         param.numerical_value = numerical_value
         param.datetime_value = datetime_value
         param.save()
Esempio n. 9
0
    def set_expiry(self, date_string):
        if not self.parameterset:
            raise Exception('incorrectly initialised, call with create=True')

        params = self.parameterset.experimentparameter_set
        params.all().delete()
        import datetime
        expiry_date = datetime.datetime.strptime(date_string, '%Y/%m/%d')
        param = ExperimentParameter(name=self.expiry_date, datetime_value=expiry_date, parameterset=self.parameterset)
        param.save()

        if expiry_date.date() < datetime.date.today():
            self.experiment.public = True
        else:
            self.experiment.public = False
        self.experiment.save()
Esempio n. 10
0
 def add_if_missing(parameterset,
                    name,
                    string_value=None,
                    numerical_value=None,
                    datetime_value=None):
     try:
         ExperimentParameter.objects.get(name__name=name,
                                         parameterset=parameterset)
     except ExperimentParameter.DoesNotExist:
         param_name = ParameterName.objects.get(name=name,
                                                schema=parameterset.schema)
         param = ExperimentParameter(name=param_name,
                                     parameterset=parameterset)
         param.string_value = string_value
         param.numerical_value = numerical_value
         param.datetime_value = datetime_value
         param.save()
Esempio n. 11
0
 def _save_party_refs(self,  party, party_relation):
     """ Save party and party relation information as parameters on the 
         experiment
     """
     namespace = "http://rmit.edu.au/rif-cs/party/1.0/"
     logger.debug("saving party")
     schema = None
     try:
         schema = Schema.objects.get(
             namespace__exact=namespace)
     except Schema.DoesNotExist:
         logger.debug('Schema ' + namespace +
         ' does not exist. Creating.')
         schema = Schema(namespace=namespace)
         schema.save()
     exp = Experiment.objects.get(pk=self.experiment_id)    
     party_id_param = self._make_param(schema=schema, 
                                       name="party_id",
                                       paramtype=ParameterName.NUMERIC)
     relation_param = self._make_param(schema=schema, 
                                       name="relationtocollection_id",
                                       paramtype=ParameterName.STRING)                    
     parameterset = ExperimentParameterSet(schema=schema, experiment=exp)
     parameterset.save()    
     ep = ExperimentParameter.objects.filter(name=party_id_param,
         parameterset=parameterset,
         parameterset__experiment=exp)
     for p in ep:
         p.delete()
     ep = ExperimentParameter(
         parameterset=parameterset,
         name=party_id_param,
         numerical_value=party.pk)
     ep.save()                        
     ep = ExperimentParameter.objects.filter(name=relation_param,
         parameterset=parameterset,
         parameterset__experiment=exp)
     for p in ep:
         p.delete()
     ep = ExperimentParameter(
         parameterset=parameterset,
         name=relation_param,
         string_value=party_relation)
     ep.save()
Esempio n. 12
0
def synchrotron_search_epn(publication):
    # *** Synchrotron specific ***
    # Search for beamline/EPN information associated with each dataset
    # and add to the publication.
    try:
        synch_epn_schema = Schema.objects.get(
            namespace='http://www.tardis.edu.au/schemas/as/'
                      'experiment/2010/09/21')
        datasets = Dataset.objects.filter(experiments=publication)
        synch_experiments = Experiment.objects.filter(
            datasets__in=datasets,
            experimentparameterset__schema=synch_epn_schema).exclude(
            pk=publication.pk).distinct()
        for exp in [s for s in
                    synch_experiments if not s.is_publication()]:
            epn = ExperimentParameter.objects.get(
                name__name='EPN',
                name__schema=synch_epn_schema,
                parameterset__experiment=exp).string_value
            beamline = ExperimentParameter.objects.get(
                name__name='beamline',
                name__schema=synch_epn_schema,
                parameterset__experiment=exp).string_value

            epn_parameter_set = ExperimentParameterSet(
                schema=synch_epn_schema,
                experiment=publication)
            epn_parameter_set.save()
            epn_copy = ExperimentParameter(
                name=ParameterName.objects.get(
                    name='EPN', schema=synch_epn_schema),
                parameterset=epn_parameter_set)
            epn_copy.string_value = epn
            epn_copy.save()
            beamline_copy = ExperimentParameter(
                name=ParameterName.objects.get(
                    name='beamline', schema=synch_epn_schema),
                parameterset=epn_parameter_set)
            beamline_copy.string_value = beamline
            beamline_copy.save()
    except Schema.DoesNotExist:
        pass
Esempio n. 13
0
def synchrotron_search_epn(publication):
    # *** Synchrotron specific ***
    # Search for beamline/EPN information associated with each dataset
    # and add to the publication.
    try:
        synch_epn_schema = Schema.objects.get(
            namespace='http://www.tardis.edu.au/schemas/as/'
                      'experiment/2010/09/21')
        datasets = Dataset.objects.filter(experiments=publication)
        synch_experiments = Experiment.objects.filter(
            datasets__in=datasets,
            experimentparameterset__schema=synch_epn_schema).exclude(
            pk=publication.pk).distinct()
        for exp in [s for s in
                    synch_experiments if not s.is_publication()]:
            epn = ExperimentParameter.objects.get(
                name__name='EPN',
                name__schema=synch_epn_schema,
                parameterset__experiment=exp).string_value
            beamline = ExperimentParameter.objects.get(
                name__name='beamline',
                name__schema=synch_epn_schema,
                parameterset__experiment=exp).string_value

            epn_parameter_set = ExperimentParameterSet(
                schema=synch_epn_schema,
                experiment=publication)
            epn_parameter_set.save()
            epn_copy = ExperimentParameter(
                name=ParameterName.objects.get(
                    name='EPN', schema=synch_epn_schema),
                parameterset=epn_parameter_set)
            epn_copy.string_value = epn
            epn_copy.save()
            beamline_copy = ExperimentParameter(
                name=ParameterName.objects.get(
                    name='beamline', schema=synch_epn_schema),
                parameterset=epn_parameter_set)
            beamline_copy.string_value = beamline
            beamline_copy.save()
    except Schema.DoesNotExist:
        pass
 def _save_party_refs(self, party, party_relation):
     """ Save party and party relation information as parameters on the 
         experiment
     """
     namespace = "http://rmit.edu.au/rif-cs/party/1.0/"
     logger.debug("saving party")
     schema = None
     try:
         schema = Schema.objects.get(namespace__exact=namespace)
     except Schema.DoesNotExist:
         logger.debug('Schema ' + namespace + ' does not exist. Creating.')
         schema = Schema(namespace=namespace)
         schema.save()
     exp = Experiment.objects.get(pk=self.experiment_id)
     party_id_param = self._make_param(schema=schema,
                                       name="party_id",
                                       paramtype=ParameterName.NUMERIC)
     relation_param = self._make_param(schema=schema,
                                       name="relationtocollection_id",
                                       paramtype=ParameterName.STRING)
     parameterset = ExperimentParameterSet(schema=schema, experiment=exp)
     parameterset.save()
     ep = ExperimentParameter.objects.filter(name=party_id_param,
                                             parameterset=parameterset,
                                             parameterset__experiment=exp)
     for p in ep:
         p.delete()
     ep = ExperimentParameter(parameterset=parameterset,
                              name=party_id_param,
                              numerical_value=party.pk)
     ep.save()
     ep = ExperimentParameter.objects.filter(name=relation_param,
                                             parameterset=parameterset,
                                             parameterset__experiment=exp)
     for p in ep:
         p.delete()
     ep = ExperimentParameter(parameterset=parameterset,
                              name=relation_param,
                              string_value=party_relation)
     ep.save()
 def mint_key(self, experiment):
     number_chars = 64
     key_value = self._make_rand_string(number_chars)
     eps, _ = ExperimentParameterSet.objects.\
         get_or_create(experiment=experiment, schema=self.schema)
     ep = ExperimentParameter(parameterset=eps,
         name=self.key_name,
         string_value=key_value)
     ep.save()
     # Need to reload key to verify that it actually been saved, because
     # destination may ingest METS code and miss this parameter if it has
     # been delayed.  If we can't retrieve parameter, then pass back None,
     # which is an error state at the destination
     doi_params = ExperimentParameter.objects.filter(
         name=self.key_name,
         parameterset__schema=self.schema,
         parameterset__experiment=eps)
     if doi_params.count() >= 1:
         key_value = doi_params[0].string_value
         return key_value
     else:
         return None
     return key_value
 def mint_key(self, experiment):
     number_chars = 64
     key_value = self._make_rand_string(number_chars)
     eps, _ = ExperimentParameterSet.objects.\
         get_or_create(experiment=experiment, schema=self.schema)
     ep = ExperimentParameter(parameterset=eps,
                              name=self.key_name,
                              string_value=key_value)
     ep.save()
     # Need to reload key to verify that it actually been saved, because
     # destination may ingest METS code and miss this parameter if it has
     # been delayed.  If we can't retrieve parameter, then pass back None,
     # which is an error state at the destination
     doi_params = ExperimentParameter.objects.filter(
         name=self.key_name,
         parameterset__schema=self.schema,
         parameterset__experiment=eps)
     if doi_params.count() >= 1:
         key_value = doi_params[0].string_value
         return key_value
     else:
         return None
     return key_value
Esempio n. 17
0
    def test_parameter(self):
        exp = Experiment(
            title='test exp1',
            institution_name='Australian Synchrotron',
            approved=True,
            created_by=self.user,
            public_access=Experiment.PUBLIC_ACCESS_NONE,
        )
        exp.save()

        dataset = Dataset(description="dataset description")
        dataset.save()
        dataset.experiments.add(exp)
        dataset.save()

        df_file = DataFile(dataset=dataset,
                           filename='file.txt',
                           size=42,
                           md5sum='bogus')
        df_file.save()

        df_schema = Schema(namespace='http://www.cern.ch/felzmann/schema1.xml',
                           type=Schema.DATAFILE)
        df_schema.save()

        ds_schema = Schema(namespace='http://www.cern.ch/felzmann/schema2.xml',
                           type=Schema.DATASET)
        ds_schema.save()

        exp_schema = Schema(
            namespace='http://www.cern.ch/felzmann/schema3.xml',
            type=Schema.EXPERIMENT)
        exp_schema.save()

        df_parname = ParameterName(schema=df_schema,
                                   name='name',
                                   full_name='full_name',
                                   units='image/jpg',
                                   data_type=ParameterName.FILENAME)
        df_parname.save()

        ds_parname = ParameterName(schema=ds_schema,
                                   name='name',
                                   full_name='full_name',
                                   units='image/jpg',
                                   data_type=ParameterName.FILENAME)
        ds_parname.save()

        exp_parname = ParameterName(schema=exp_schema,
                                    name='name',
                                    full_name='full_name',
                                    units='image/jpg',
                                    data_type=ParameterName.FILENAME)
        exp_parname.save()

        df_parset = DatafileParameterSet(schema=df_schema, datafile=df_file)
        df_parset.save()

        ds_parset = DatasetParameterSet(schema=ds_schema, dataset=dataset)
        ds_parset.save()

        exp_parset = ExperimentParameterSet(schema=exp_schema, experiment=exp)
        exp_parset.save()

        with self.settings(METADATA_STORE_PATH=os.path.dirname(__file__)):
            filename = 'test.jpg'
            df_parameter = DatafileParameter(name=df_parname,
                                             parameterset=df_parset,
                                             string_value=filename)
            df_parameter.save()

            ds_parameter = DatasetParameter(name=ds_parname,
                                            parameterset=ds_parset,
                                            string_value=filename)
            ds_parameter.save()

            exp_parameter = ExperimentParameter(name=exp_parname,
                                                parameterset=exp_parset,
                                                string_value=filename)
            exp_parameter.save()

            self.assertEqual(
                "<a href='/display/DatafileImage/load/%i/' target='_blank'><img style='width: 300px;' src='/display/DatafileImage/load/%i/' /></a>"
                %  # noqa
                (df_parameter.id, df_parameter.id),
                df_parameter.get())

            self.assertEqual(
                "<a href='/display/DatasetImage/load/%i/' target='_blank'><img style='width: 300px;' src='/display/DatasetImage/load/%i/' /></a>"
                %  # noqa
                (ds_parameter.id, ds_parameter.id),
                ds_parameter.get())

            self.assertEqual(
                "<a href='/display/ExperimentImage/load/%i/' target='_blank'><img style='width: 300px;' src='/display/ExperimentImage/load/%i/' /></a>"
                %  # noqa
                (exp_parameter.id, exp_parameter.id),
                exp_parameter.get())
Esempio n. 18
0
def populate_pdb_pub_records():
    PUB_SCHEMA = getattr(settings, 'PUBLICATION_SCHEMA_ROOT',
                         default_settings.PUBLICATION_SCHEMA_ROOT)
    PUB_SCHEMA_DRAFT = getattr(settings, 'PUBLICATION_DRAFT_SCHEMA',
                               default_settings.PUBLICATION_DRAFT_SCHEMA)
    PDB_SCHEMA = getattr(settings, 'PDB_PUBLICATION_SCHEMA_ROOT',
                         default_settings.PDB_PUBLICATION_SCHEMA_ROOT)
    publications = Experiment.objects \
        .filter(experimentparameterset__schema__namespace=PDB_SCHEMA) \
        .filter(experimentparameterset__schema__namespace=PUB_SCHEMA) \
        .exclude(experimentparameterset__schema__namespace=PUB_SCHEMA_DRAFT) \
        .distinct()

    last_update_parameter_name = ParameterName.objects.get(
        name='pdb-last-sync', schema__namespace=PUB_SCHEMA)

    def add_if_missing(parameterset,
                       name,
                       string_value=None,
                       numerical_value=None,
                       datetime_value=None):
        try:
            ExperimentParameter.objects.get(name__name=name,
                                            parameterset=parameterset)
        except ExperimentParameter.DoesNotExist:
            param_name = ParameterName.objects.get(name=name,
                                                   schema=parameterset.schema)
            param = ExperimentParameter(name=param_name,
                                        parameterset=parameterset)
            param.string_value = string_value
            param.numerical_value = numerical_value
            param.datetime_value = datetime_value
            param.save()

    for pub in publications:
        try:
            # try to get the last update time for the PDB data
            pdb_last_update_parameter = ExperimentParameter.objects.get(
                parameterset__schema__namespace=PUB_SCHEMA,
                name=last_update_parameter_name,
                parameterset__experiment=pub)
            last_update = pdb_last_update_parameter.datetime_value
            needs_update = last_update + \
                getattr(settings,
                        'PDB_REFRESH_INTERVAL',
                        default_settings.PDB_REFRESH_INTERVAL) \
                < timezone.now()

        except ExperimentParameter.DoesNotExist:
            # if the PDB last update time parameter doesn't exist,
            # we definitely need to update the data and create a last
            # update entry
            needs_update = True
            pdb_last_update_parameter = None

        # If an update needs to happen...
        if needs_update:
            # 1. get the PDB info
            pdb_parameter_set = ExperimentParameterSet.objects.get(
                schema__namespace=getattr(
                    settings, 'PDB_PUBLICATION_SCHEMA_ROOT',
                    default_settings.PDB_PUBLICATION_SCHEMA_ROOT),
                experiment=pub)
            pdb = ExperimentParameter.objects.get(
                name__name='pdb-id', parameterset=pdb_parameter_set)
            pdb_id = pdb.string_value
            # 1a. cosmetic change of case for PDB ID, if entered incorrectly
            if pdb_id != pdb_id.upper():
                pdb.string_value = pdb_id.upper()
                pdb.save()

            try:
                # 2. fetch the info from pdb.org
                pdb = PDBCifHelper(pdb_id)

                # 3. insert all standard pdb parameters
                add_if_missing(pdb_parameter_set,
                               'title',
                               string_value=pdb.get_pdb_title())
                add_if_missing(pdb_parameter_set,
                               'url',
                               string_value=pdb.get_pdb_url())
                try:
                    add_if_missing(pdb_parameter_set,
                                   'resolution',
                                   numerical_value=pdb.get_resolution())
                except ValueError:
                    logger.error('PDB field "resolution" could not be set for '
                                 'publication Id %i \n %s' %
                                 (pub.id, traceback.format_exc()))

                try:
                    add_if_missing(pdb_parameter_set,
                                   'r-value',
                                   numerical_value=pdb.get_obs_r_value())
                except ValueError:
                    logger.error('PDB field "r-value" could not be set for '
                                 'publication Id %i \n %s' %
                                 (pub.id, traceback.format_exc()))

                try:
                    add_if_missing(pdb_parameter_set,
                                   'r-free',
                                   numerical_value=pdb.get_free_r_value())
                except ValueError:
                    logger.error('PDB field "r-free" could not be set for '
                                 'publication Id %i \n %s' %
                                 (pub.id, traceback.format_exc()))

                add_if_missing(pdb_parameter_set,
                               'space-group',
                               string_value=pdb.get_spacegroup())
                add_if_missing(pdb_parameter_set,
                               'unit-cell',
                               string_value=pdb.get_unit_cell())

                # 4. insert sequence info (lazy checking)
                pdb_seq_parameter_sets = ExperimentParameterSet.objects.filter(
                    schema__namespace=getattr(
                        settings, 'PDB_SEQUENCE_PUBLICATION_SCHEMA',
                        default_settings.PDB_SEQUENCE_PUBLICATION_SCHEMA),
                    experiment=pub)
                if pdb_seq_parameter_sets.count() == 0:
                    # insert seqences
                    for seq in pdb.get_sequence_info():
                        seq_ps_namespace = getattr(
                            settings, 'PDB_SEQUENCE_PUBLICATION_SCHEMA',
                            default_settings.PDB_SEQUENCE_PUBLICATION_SCHEMA)
                        seq_parameter_set = ExperimentParameterSet(
                            schema=Schema.objects.get(
                                namespace=seq_ps_namespace),
                            experiment=pub)
                        seq_parameter_set.save()
                        add_if_missing(seq_parameter_set,
                                       'organism',
                                       string_value=seq['organism'])
                        add_if_missing(seq_parameter_set,
                                       'expression-system',
                                       string_value=seq['expression_system'])
                        add_if_missing(seq_parameter_set,
                                       'sequence',
                                       string_value=seq['sequence'])

                # 5. insert/update citation info (aggressive)
                ExperimentParameterSet.objects.filter(
                    schema__namespace=getattr(
                        settings, 'PDB_CITATION_PUBLICATION_SCHEMA',
                        default_settings.PDB_CITATION_PUBLICATION_SCHEMA),
                    experiment=pub).delete()
                for citation in pdb.get_citations():
                    cit_ps_namespace = getattr(
                        settings, 'PDB_CITATION_PUBLICATION_SCHEMA',
                        default_settings.PDB_CITATION_PUBLICATION_SCHEMA)
                    cit_parameter_set = ExperimentParameterSet(
                        schema=Schema.objects.get(namespace=cit_ps_namespace),
                        experiment=pub)
                    cit_parameter_set.save()
                    add_if_missing(cit_parameter_set,
                                   'title',
                                   string_value=citation['title'])
                    add_if_missing(cit_parameter_set,
                                   'authors',
                                   string_value='; '.join(citation['authors']))
                    add_if_missing(cit_parameter_set,
                                   'journal',
                                   string_value=citation['journal'])
                    add_if_missing(cit_parameter_set,
                                   'volume',
                                   string_value=citation['volume'])
                    add_if_missing(cit_parameter_set,
                                   'page-range',
                                   string_value='-'.join([
                                       citation['page_first'],
                                       citation['page_last']
                                   ]))
                    add_if_missing(cit_parameter_set,
                                   'doi',
                                   string_value='http://dx.doi.org/' +
                                   citation['doi'])

                # 6. Remove the PDB embargo if set, since the update has
                # occurred and therefore the PDB must have been relased.
                try:
                    ExperimentParameter.objects.get(
                        name__name='pdb-embargo',
                        parameterset__schema__namespace=getattr(
                            settings, 'PUBLICATION_SCHEMA_ROOT',
                            default_settings.PUBLICATION_SCHEMA_ROOT)).delete(
                            )
                except ExperimentParameter.DoesNotExist:
                    pass

                # 7. Set the last update parameter to be now
                if pdb_last_update_parameter is None:
                    pub_parameter_set = ExperimentParameterSet(
                        schema=Schema.objects.get(namespace=PUB_SCHEMA),
                        experiment=pub)
                    pub_parameter_set.save()
                    pdb_last_update_parameter = ExperimentParameter(
                        name=last_update_parameter_name,
                        parameterset=pub_parameter_set,
                        datetime_value=timezone.now())
                else:
                    pdb_last_update_parameter.datetime_value = timezone.now()
                pdb_last_update_parameter.save()

            except CifFile.StarError:
                # PDB is either unavailable or invalid
                # (maybe notify the user somehow?)
                continue
Esempio n. 19
0
def populate_pdb_pub_records():
    PUB_SCHEMA = getattr(settings, 'PUBLICATION_SCHEMA_ROOT',
                         default_settings.PUBLICATION_SCHEMA_ROOT)
    PUB_SCHEMA_DRAFT = getattr(settings, 'PUBLICATION_DRAFT_SCHEMA',
                               default_settings.PUBLICATION_DRAFT_SCHEMA)
    PDB_SCHEMA = getattr(settings, 'PDB_PUBLICATION_SCHEMA_ROOT',
                         default_settings.PDB_PUBLICATION_SCHEMA_ROOT)
    publications = Experiment.objects \
        .filter(experimentparameterset__schema__namespace=PDB_SCHEMA) \
        .filter(experimentparameterset__schema__namespace=PUB_SCHEMA) \
        .exclude(experimentparameterset__schema__namespace=PUB_SCHEMA_DRAFT) \
        .distinct()

    last_update_parameter_name = ParameterName.objects.get(
        name='pdb-last-sync',
        schema__namespace=PUB_SCHEMA)

    def add_if_missing(parameterset, name, string_value=None,
                       numerical_value=None, datetime_value=None):
        try:
            ExperimentParameter.objects.get(
                name__name=name, parameterset=parameterset)
        except ExperimentParameter.DoesNotExist:
            param_name = ParameterName.objects.get(
                name=name, schema=parameterset.schema)
            param = ExperimentParameter(name=param_name,
                                        parameterset=parameterset)
            param.string_value = string_value
            param.numerical_value = numerical_value
            param.datetime_value = datetime_value
            param.save()

    for pub in publications:
        try:
            # try to get the last update time for the PDB data
            pdb_last_update_parameter = ExperimentParameter.objects.get(
                parameterset__schema__namespace=PUB_SCHEMA,
                name=last_update_parameter_name,
                parameterset__experiment=pub
            )
            last_update = pdb_last_update_parameter.datetime_value
            needs_update = last_update + \
                getattr(settings,
                        'PDB_REFRESH_INTERVAL',
                        default_settings.PDB_REFRESH_INTERVAL) \
                < timezone.now()

        except ExperimentParameter.DoesNotExist:
            # if the PDB last update time parameter doesn't exist,
            # we definitely need to update the data and create a last
            # update entry
            needs_update = True
            pdb_last_update_parameter = None

        # If an update needs to happen...
        if needs_update:
            # 1. get the PDB info
            pdb_parameter_set = ExperimentParameterSet.objects.get(
                schema__namespace=getattr(
                    settings,
                    'PDB_PUBLICATION_SCHEMA_ROOT',
                    default_settings.PDB_PUBLICATION_SCHEMA_ROOT),
                experiment=pub)
            pdb = ExperimentParameter.objects.get(
                name__name='pdb-id',
                parameterset=pdb_parameter_set)
            pdb_id = pdb.string_value
            # 1a. cosmetic change of case for PDB ID, if entered incorrectly
            if pdb_id != pdb_id.upper():
                pdb.string_value = pdb_id.upper()
                pdb.save()

            try:
                # 2. fetch the info from pdb.org
                pdb = PDBCifHelper(pdb_id)

                # 3. insert all standard pdb parameters
                add_if_missing(pdb_parameter_set, 'title',
                               string_value=pdb.get_pdb_title())
                add_if_missing(pdb_parameter_set, 'url',
                               string_value=pdb.get_pdb_url())
                try:
                    add_if_missing(pdb_parameter_set, 'resolution',
                                   numerical_value=pdb.get_resolution())
                except ValueError:
                    logger.error(
                        'PDB field "resolution" could not be set for '
                        'publication Id %i \n %s' %
                        (pub.id, traceback.format_exc()))

                try:
                    add_if_missing(pdb_parameter_set, 'r-value',
                                   numerical_value=pdb.get_obs_r_value())
                except ValueError:
                    logger.error(
                        'PDB field "r-value" could not be set for '
                        'publication Id %i \n %s' %
                        (pub.id, traceback.format_exc()))

                try:
                    add_if_missing(pdb_parameter_set, 'r-free',
                                   numerical_value=pdb.get_free_r_value())
                except ValueError:
                    logger.error(
                        'PDB field "r-free" could not be set for '
                        'publication Id %i \n %s' %
                        (pub.id, traceback.format_exc()))

                add_if_missing(pdb_parameter_set, 'space-group',
                               string_value=pdb.get_spacegroup())
                add_if_missing(pdb_parameter_set, 'unit-cell',
                               string_value=pdb.get_unit_cell())

                # 4. insert sequence info (lazy checking)
                pdb_seq_parameter_sets = ExperimentParameterSet.objects.filter(
                    schema__namespace=getattr(
                        settings,
                        'PDB_SEQUENCE_PUBLICATION_SCHEMA',
                        default_settings.PDB_SEQUENCE_PUBLICATION_SCHEMA),
                    experiment=pub)
                if pdb_seq_parameter_sets.count() == 0:
                    # insert seqences
                    for seq in pdb.get_sequence_info():
                        seq_ps_namespace = getattr(
                            settings,
                            'PDB_SEQUENCE_PUBLICATION_SCHEMA',
                            default_settings.PDB_SEQUENCE_PUBLICATION_SCHEMA)
                        seq_parameter_set = ExperimentParameterSet(
                            schema=Schema.objects.get(
                                namespace=seq_ps_namespace),
                            experiment=pub)
                        seq_parameter_set.save()
                        add_if_missing(seq_parameter_set, 'organism',
                                       string_value=seq['organism'])
                        add_if_missing(seq_parameter_set, 'expression-system',
                                       string_value=seq['expression_system'])
                        add_if_missing(seq_parameter_set, 'sequence',
                                       string_value=seq['sequence'])

                # 5. insert/update citation info (aggressive)
                ExperimentParameterSet.objects.filter(
                    schema__namespace=getattr(
                        settings,
                        'PDB_CITATION_PUBLICATION_SCHEMA',
                        default_settings.PDB_CITATION_PUBLICATION_SCHEMA),
                    experiment=pub).delete()
                for citation in pdb.get_citations():
                    cit_ps_namespace = getattr(
                        settings,
                        'PDB_CITATION_PUBLICATION_SCHEMA',
                        default_settings.PDB_CITATION_PUBLICATION_SCHEMA)
                    cit_parameter_set = ExperimentParameterSet(
                        schema=Schema.objects.get(namespace=cit_ps_namespace),
                        experiment=pub)
                    cit_parameter_set.save()
                    add_if_missing(cit_parameter_set, 'title',
                                   string_value=citation['title'])
                    add_if_missing(cit_parameter_set, 'authors',
                                   string_value='; '.join(citation['authors']))
                    add_if_missing(cit_parameter_set, 'journal',
                                   string_value=citation['journal'])
                    add_if_missing(cit_parameter_set, 'volume',
                                   string_value=citation['volume'])
                    add_if_missing(cit_parameter_set, 'page-range',
                                   string_value='-'.join(
                                       [citation['page_first'],
                                        citation['page_last']]))
                    add_if_missing(cit_parameter_set, 'doi',
                                   string_value='http://dx.doi.org/' +
                                                citation['doi'])

                # 6. Remove the PDB embargo if set, since the update has
                # occurred and therefore the PDB must have been relased.
                try:
                    ExperimentParameter.objects.get(
                        name__name='pdb-embargo',
                        parameterset__schema__namespace=getattr(
                            settings,
                            'PUBLICATION_SCHEMA_ROOT',
                            default_settings.PUBLICATION_SCHEMA_ROOT)).delete()
                except ExperimentParameter.DoesNotExist:
                    pass

                # 7. Set the last update parameter to be now
                if pdb_last_update_parameter is None:
                    pub_parameter_set = ExperimentParameterSet(
                        schema=Schema.objects.get(namespace=PUB_SCHEMA),
                        experiment=pub)
                    pub_parameter_set.save()
                    pdb_last_update_parameter = ExperimentParameter(
                        name=last_update_parameter_name,
                        parameterset=pub_parameter_set,
                        datetime_value=timezone.now())
                else:
                    pdb_last_update_parameter.datetime_value = timezone.now()
                pdb_last_update_parameter.save()

            except CifFile.StarError:
                # PDB is either unavailable or invalid
                # (maybe notify the user somehow?)
                continue
Esempio n. 20
0
 def _save_doi(self, doi):
     paramset = self._get_or_create_doi_parameterset()
     ep = ExperimentParameter(parameterset=paramset, name=self.doi_name,\
                                 string_value=doi)
     ep.save()
     return doi