Beispiel #1
0
    def format_attribute(self):
        """
        Format an object instance as a dictionary used by biosample, for
        example::

            [{
                'value': 'submitter',
                'terms': [{'url': 'http://www.ebi.ac.uk/efo/EFO_0001741'}]
            }]

        the fixed part of URI link is defined by ``library_name`` class
        attribute
        """

        if self.library_name is None:
            logger.warning("library_name not defined")
            library_uri = OBO_URL

        else:
            library = Ontology.objects.get(library_name=self.library_name)
            library_uri = library.library_uri

        return format_attribute(value=self.label,
                                library_uri=library_uri,
                                terms=self.term)
Beispiel #2
0
    def format_attribute(self):
        """Format mapped_breed attribute (with its ontology). Return None if
        no mapped_breed"""

        if not self.mapped_breed or not self.mapped_breed_term:
            return None

        library = Ontology.objects.get(library_name=self.library_name)
        library_uri = library.library_uri

        return format_attribute(value=self.mapped_breed,
                                library_uri=library_uri,
                                terms=self.mapped_breed_term)
Beispiel #3
0
    def get_attributes(self):
        """Return attributes like biosample needs"""

        attributes = super().get_attributes()

        attributes["Material"] = format_attribute(value="organism",
                                                  terms="OBI_0100026")

        # TODO: how to model derived from (mother/father)?

        attributes['Supplied breed'] = format_attribute(
            value=self.breed.supplied_breed)

        # HINT: Ideally, I could retrieve an ontology id for countries
        attributes['EFABIS Breed country'] = format_attribute(
            value=self.breed.country.label)

        attributes['Mapped breed'] = self.breed.format_attribute()

        attributes['Sex'] = self.sex.format_attribute()

        # a datetime object should be not be converted in string here,
        # otherwise will not be filtered if NULL
        attributes['Birth date'] = format_attribute(value=self.birth_date,
                                                    units="YYYY-MM-DD")

        attributes["Birth location"] = format_attribute(
            value=self.birth_location)

        attributes["Birth location longitude"] = format_attribute(
            value=self.birth_location_longitude, units="decimal degrees")

        attributes["Birth location latitude"] = format_attribute(
            value=self.birth_location_latitude, units="decimal degrees")

        attributes["Birth location accuracy"] = format_attribute(
            value=self.get_birth_location_accuracy_display())

        # filter out empty values
        attributes = {k: v for k, v in attributes.items() if v is not None}

        return attributes
Beispiel #4
0
    def get_attributes(self):
        """Common attribute definition required from Animal and samples. Need
        to be called inside Animal/sample get_atribute method. Keys
        is the name in metadata rules

        Returns:
            dict: a dictionary object
        """

        attributes = {}

        attributes['Data source ID'] = format_attribute(
            value=self.data_source_id)

        attributes['Alternative id'] = format_attribute(
            value=self.alternative_id)

        # HINT: this is a mandatory biosample field: could be removed from
        # attributes?
        attributes['Description'] = format_attribute(value=self.description)

        attributes["Project"] = format_attribute(value="IMAGE")

        # to retrieve where this sample belongs
        attributes["IMAGE submission id"] = format_attribute(
            value=self.submission.id)

        attributes['Submission title'] = format_attribute(
            value=self.submission.title)

        attributes['Submission description'] = format_attribute(
            value=self.submission.description)

        attributes['Person last name'] = format_attribute(
            value=self.owner.last_name)

        attributes['Person initial'] = format_attribute(
            value=self.person.initials)

        attributes['Person first name'] = format_attribute(
            value=self.owner.first_name)

        attributes['Person email'] = format_attribute(value="mailto:%s" %
                                                      (self.owner.email))

        attributes['Person affiliation'] = format_attribute(
            value=self.person.affiliation.name)

        attributes['Person role'] = self.person.role.format_attribute()

        attributes['Organization name'] = format_attribute(
            value=self.organization.name)

        attributes['Organization address'] = format_attribute(
            value=self.organization.address)

        attributes['Organization uri'] = format_attribute(
            value=self.organization.URI)

        attributes['Organization country'] = \
            self.organization.country.format_attribute()

        attributes[
            'Organization role'] = self.organization.role.format_attribute()

        # this could be present or not
        if self.publication:
            attributes['Publication DOI'] = format_attribute(
                value=self.publication.doi)

        attributes['Gene bank name'] = format_attribute(
            value=self.gene_bank_name)

        attributes[
            'Gene bank country'] = self.gene_bank_country.format_attribute()

        attributes['Data source type'] = format_attribute(
            value=self.submission.get_datasource_type_display())

        attributes['Data source version'] = format_attribute(
            value=self.submission.datasource_version)

        attributes['Species'] = self.specie.format_attribute()

        return attributes
Beispiel #5
0
def get_orphan_samples(limit=None):
    """
    Iterate for all BioSample orphaned records which are not yet removed and
    are tracked for removal, get minimal data from BioSample and return a
    dictionary which can be used to patch a BioSample id with a new
    BioSample submission in order to remove a BioSamples record
    (publish the BioSample record after 1000 years from Now).

    Yields
    ------
    new_data : dict
        payload to submit to BioSample in order to remove a BioSamples record.
    """

    with requests.Session() as session:
        # get all biosamples candidate for a removal. Pay attention that
        # could be removed from different users
        qs = ORPHAN_QS.order_by('team__name', 'id')

        if limit:
            qs = islice(qs, limit)

        for orphan_sample in qs:
            # define the url I need to check
            url = "/".join([BIOSAMPLE_URL, orphan_sample.biosample_id])

            # read data from url
            response = session.get(url)
            data = response.json()

            # check status
            if response.status_code == 403:
                logger.error("Error for %s (%s): %s" % (
                    orphan_sample.biosample_id,
                    data['error'],
                    data['message'])
                )

                # this sample seems already removed
                continue

            # I need a new data dictionary to submit
            new_data = dict()

            # I suppose the accession exists, since I found this sample
            # using accession [biosample.id]
            new_data['accession'] = data.get(
                'accession', orphan_sample.biosample_id)

            new_data['alias'] = data['name']

            new_data['title'] = data['characteristics']['title'][0]['text']

            # this will be the most important attribute
            new_data['releaseDate'] = str(
                parse_date(data['releaseDate']) + RELEASE_TIMEDELTA)

            new_data['taxonId'] = data['taxId']

            # need to determine taxon as
            new_data['taxon'] = DictSpecie.objects.get(
                term__endswith=data['taxId']).label

            new_data['attributes'] = dict()

            new_data['description'] = "Removed by InjectTool"

            # set project again
            new_data['attributes']["Project"] = format_attribute(
                value="IMAGE")

            # return new biosample data
            yield {
                'data': new_data,
                'team': orphan_sample.team,
                'sample': orphan_sample,
            }
Beispiel #6
0
    def get_attributes(self):
        """Return attributes like biosample needs"""

        attributes = super().get_attributes()

        attributes["Material"] = format_attribute(
            value="specimen from organism", terms="OBI_0001479")

        # The data source id or alternative id of the animal from which
        # the sample was collected (see Animal.to_biosample())
        attributes['Derived from'] = format_attribute(value=self.animal.name)

        attributes["Specimen collection protocol"] = format_attribute(
            value=self.protocol)

        # a datetime object should be not be converted in string here,
        # otherwise will not be filtered if NULL
        attributes['Collection date'] = format_attribute(
            value=self.collection_date, units="YYYY-MM-DD")

        attributes['Collection place'] = format_attribute(
            value=self.collection_place)

        attributes["Collection place longitude"] = format_attribute(
            value=self.collection_place_longitude, units="decimal degrees")

        attributes["Collection place latitude"] = format_attribute(
            value=self.collection_place_latitude, units="decimal degrees")

        attributes["Collection place accuracy"] = format_attribute(
            value=self.get_collection_place_accuracy_display())

        # this will point to a correct term dictionary table
        if self.organism_part:
            attributes['Organism part'] = self.organism_part.format_attribute()

        if self.developmental_stage:
            attributes['Developmental stage'] = \
                self.developmental_stage.format_attribute()

        if self.physiological_stage:
            attributes['Physiological stage'] = \
                self.physiological_stage.format_attribute()

        attributes['Animal age at collection'] = format_attribute(
            value=self.animal_age_at_collection,
            units=self.get_animal_age_at_collection_units_display())

        attributes['Availability'] = format_attribute(value=self.availability)

        attributes['Sample storage'] = format_attribute(
            value=self.get_storage_display())

        attributes['Sample storage processing'] = format_attribute(
            value=self.get_storage_processing_display())

        attributes['Sampling to preparation interval'] = format_attribute(
            value=self.preparation_interval,
            units=self.get_preparation_interval_units_display())

        # filter out empty values
        attributes = {k: v for k, v in attributes.items() if v is not None}

        return attributes