Exemple #1
0
def fill_uid_breeds(submission_obj, template):
    """Fill DictBreed from a excel record"""

    logger.info("fill_uid_breeds() started")

    # ok get languages from submission (useful for translation)
    language = submission_obj.gene_bank_country.label

    # iterate among excel template
    for record in template.get_breed_records():
        # get a DictSpecie object. Species are in latin names, but I can
        # find also a common name in translation tables
        specie = DictSpecie.get_specie_check_synonyms(
            species_label=record.species,
            language=language)

        # get country for breeds. Ideally will be the same of submission,
        # however, it could be possible to store data from other contries
        country = DictCountry.objects.get(label=record.efabis_breed_country)

        get_or_create_obj(
            DictBreed,
            supplied_breed=record.supplied_breed,
            specie=specie,
            country=country)

    logger.info("fill_uid_breeds() completed")
Exemple #2
0
def fill_Organization():
    """Fill organization table"""

    base_dir = os.path.dirname(os.path.abspath(__file__))
    filename = os.path.join(base_dir, "organization_list.csv")

    # open data file
    handle = open(filename)
    reader = csv.reader(handle, delimiter=";")
    Data = collections.namedtuple('Data', "id name country")

    # get a submitter role
    submitter = fill_DictRoles()

    for row in map(Data._make, reader):
        # get a country object
        country = get_or_create_obj(DictCountry, label=row.country)

        # HINT: could be better to fix organization names in organization_list?
        get_or_create_obj(Organization,
                          name=standardize_institute_name(row.name),
                          role=submitter,
                          country=country)

    handle.close()
Exemple #3
0
def fill_uid_breeds(submission):
    """Fill UID DictBreed model. Require a submission instance"""

    logger.info("fill_uid_breeds() started")

    # get submission language
    language = submission.gene_bank_country.label

    for v_breed_specie in VBreedsSpecies.objects.all():
        # get specie. Since I need a dictionary tables, DictSpecie is
        # already filled
        specie = DictSpecie.get_by_synonym(
            synonym=v_breed_specie.ext_species,
            language=language)

        # get country for breeds. Ideally will be the same of submission,
        # since the Italian cryoweb is supposed to contains italian breeds.
        # however, it could be possible to store data from other contries
        country = DictCountry.objects.get(label=v_breed_specie.efabis_country)

        # create breed obj if necessary
        get_or_create_obj(
            DictBreed,
            supplied_breed=v_breed_specie.efabis_mcname,
            specie=specie,
            country=country)

    logger.info("fill_uid_breeds() completed")
Exemple #4
0
def fill_OrganismParts():
    """Fill organism parts with manually curated terms"""

    data = {'strand of hair': "UBERON_0001037"}

    for label, term in data.items():
        get_or_create_obj(DictUberon,
                          label=label,
                          term=term,
                          confidence=CURATED)
Exemple #5
0
def fill_uid_sample(record, animal, submission):
    """Helper function to fill animal data in UID sample table"""

    # name and animal name come from parameters
    organism_part_label = None
    sample_type_name = record.sample_type_name.lower()
    body_part_name = record.body_part_name.lower()

    # sylvain has proposed to apply the following decision rule:
    if body_part_name != "unknown" and body_part_name != "not relevant":
        organism_part_label = body_part_name

    else:
        organism_part_label = sample_type_name

    # get a organism part. Organism parts need to be in lowercases
    organism_part = get_or_create_obj(DictUberon, label=organism_part_label)

    # calculate animal age at collection
    animal_birth_date = parse_date(record.animal_birth_date)
    sampling_date = parse_date(record.sampling_date)
    animal_age_at_collection, time_units = image_timedelta(
        sampling_date, animal_birth_date)

    # get a publication (if present)
    publication = None

    if record.sample_bibliographic_references:
        publication = get_or_create_obj(
            Publication, doi=record.sample_bibliographic_references)

    # create a new object. Using defaults to avoid collisions when
    # updating data
    defaults = {
        # HINT: is a duplication of name. Can this be non-mandatory?
        'alternative_id': record.sample_identifier,
        'collection_date': record.sampling_date,
        'protocol': record.sampling_protocol_url,
        'organism_part': organism_part,
        # 'description': v_vessel.comment,
        'storage': find_storage_type(record),
        'availability': sanitize_url(record.sample_availability),
        'animal_age_at_collection': animal_age_at_collection,
        'animal_age_at_collection_units': time_units,
        'publication': publication,
    }

    sample = update_or_create_obj(Sample,
                                  name=record.sample_identifier,
                                  animal=animal,
                                  owner=submission.owner,
                                  submission=submission,
                                  defaults=defaults)

    return sample
Exemple #6
0
def fill_Countries():
    """Fill countries and return the default country (for languages)"""

    # define the default country for the default language
    united_kingdom = get_or_create_obj(DictCountry,
                                       label='United Kingdom',
                                       term='NCIT_C17233',
                                       confidence=CURATED)

    # add a country difficult to annotate with zooma
    get_or_create_obj(DictCountry,
                      label='Colombia',
                      term='NCIT_C16449',
                      confidence=CURATED)

    # I will return default language for translations
    return united_kingdom
Exemple #7
0
def fill_SpeciesAndSynonyms():
    """Populate cryoweb dictionary tables"""

    # insert country and get the default language
    language = fill_Countries()

    # those are cryoweb DE species an synonyms
    cryoweb = {
        'Cattle': 'Bos taurus',
        'Chicken': 'Gallus gallus',
        'Deer': 'Cervidae',
        'Duck (domestic)': 'Anas platyrhynchos',
        'Goat': 'Capra hircus',
        'Goose (domestic)': 'Anser anser',
        'Horse': 'Equus caballus',
        'Pig': 'Sus scrofa',
        'Rabbit': 'Oryctolagus cuniculus',
        'Sheep': 'Ovis aries',
        'Turkey': 'Meleagris gallopavo',
        'Rainbow trout': 'Oncorhynchus mykiss',
        'Goose': 'Anser anser',
        'Dog': 'Canis lupus familiaris',
    }

    for word, specie in cryoweb.items():
        dictspecie = get_or_create_obj(DictSpecie, label=specie)

        # update with general specie
        result = get_general_breed_by_species(specie)

        if result != {}:
            general_breed_label = result['text']
            # split the full part and get the last piece
            general_breed_term = result['ontologyTerms'].split("/")[-1]

            if dictspecie.general_breed_label != general_breed_label:
                dictspecie.general_breed_label = general_breed_label
                dictspecie.general_breed_term = general_breed_term
                dictspecie.save()
                logger.info("Added general breed: %s" % (general_breed_label))

        get_or_create_obj(SpecieSynonym,
                          dictspecie=dictspecie,
                          language=language,
                          word=word)
Exemple #8
0
def fill_Species():
    """Populate species table"""

    data = [{
        'confidence': CURATED,
        'label': 'Crassostrea gigas',
        'term': 'NCBITaxon_29159'
    }, {
        'confidence': CURATED,
        'label': 'Equus asinus',
        'term': 'NCBITaxon_9793'
    }, {
        'confidence': CURATED,
        'label': 'Oncorhynchus mykiss',
        'term': 'NCBITaxon_8022'
    }, {
        'confidence': CURATED,
        'label': 'Canis lupus familiaris',
        'term': 'NCBITaxon_9615'
    }]

    for specie in data:
        get_or_create_obj(DictSpecie, **specie)
Exemple #9
0
def fill_DictSex():
    # define three DictSex objects
    get_or_create_obj(DictSex, label='male', term='PATO_0000384')

    get_or_create_obj(DictSex, label='female', term='PATO_0000383')

    get_or_create_obj(DictSex,
                      label='record of unknown sex',
                      term='OBI_0000858')
Exemple #10
0
def fill_uid_breed(record, language):
    """Fill DictBreed from a crbanim record"""

    # get a DictSpecie object. Species are in latin names, but I can
    # find also a common name in translation tables
    specie = DictSpecie.get_specie_check_synonyms(
        species_label=record.species_latin_name, language=language)

    # get country name using pycountries
    country_name = pycountry.countries.get(
        alpha_2=record.country_of_origin).name

    # get country for breeds. Ideally will be the same of submission,
    # however, it could be possible to store data from other contries
    country = DictCountry.objects.get(label=country_name)

    breed = get_or_create_obj(DictBreed,
                              supplied_breed=record.breed_name,
                              specie=specie,
                              country=country)

    # return a DictBreed object
    return breed
Exemple #11
0
def fill_DictRoles():
    """Fill roles and return submitter role"""

    # define a submitter role
    submitter = get_or_create_obj(DictRole,
                                  label='submitter',
                                  term='EFO_0001741')

    get_or_create_obj(DictRole, label='administrator', term='EFO_0009743')

    get_or_create_obj(DictRole, label='clinician', term='EFO_0009740')

    get_or_create_obj(DictRole, label='curator', term='EFO_0001733')

    get_or_create_obj(DictRole, label='funder', term='EFO_0001736')

    get_or_create_obj(DictRole, label='investigator', term='EFO_0001739')

    get_or_create_obj(DictRole, label='technician', term='EFO_0009739')

    return submitter
Exemple #12
0
def fill_uid_animals(submission_obj, template):
    # debug
    logger.info("called fill_uid_animals()")

    # get language
    language = submission_obj.gene_bank_country.label

    # iterate among excel template
    for record in template.get_animal_records():
        # determine sex. Check for values
        sex = DictSex.objects.get(label__iexact=record.sex)

        # get specie (mind synonyms)
        specie = DictSpecie.get_specie_check_synonyms(
            species_label=record.species, language=language)

        logger.debug("Found '%s' as specie" % (specie))

        # how I can get breed from my data?
        breed_record = template.get_breed_from_animal(record)

        # get a country for this breed
        country = DictCountry.objects.get(
            label=breed_record.efabis_breed_country)

        # ok get a real dictbreed object
        breed = DictBreed.objects.get(
            supplied_breed=breed_record.supplied_breed,
            specie=specie,
            country=country)

        logger.debug("Selected breed is %s" % (breed))

        # define mother and father
        mother, father = None, None

        # get name for this animal and for mother and father
        if record.father_id_in_data_source:
            logger.debug("Getting %s as father" % (
                record.father_id_in_data_source))

            father = get_relationship(
                record.animal_id_in_data_source,
                record.father_id_in_data_source,
                breed,
                submission_obj.owner)

        if record.mother_id_in_data_source:
            logger.debug("Getting %s as mother" % (
                record.mother_id_in_data_source))

            mother = get_relationship(
                record.animal_id_in_data_source,
                record.mother_id_in_data_source,
                breed,
                submission_obj.owner)

        # now get accuracy
        accuracy = ACCURACIES.get_value_by_desc(
            record.birth_location_accuracy)

        # create a new object. Using defaults to avoid collisions when
        # updating data
        defaults = {
            'alternative_id': record.alternative_animal_id,
            'description': record.animal_description,
            'sex': sex,
            'father': father,
            'mother': mother,
            'birth_date': record.birth_date,
            'birth_location': record.birth_location,
            'birth_location_latitude': record.birth_location_latitude,
            'birth_location_longitude': record.birth_location_longitude,
            'birth_location_accuracy': accuracy,
        }

        # creating or updating an object
        update_or_create_obj(
            Animal,
            name=record.animal_id_in_data_source,
            breed=breed,
            owner=submission_obj.owner,
            submission=submission_obj,
            defaults=defaults)

    # create a validation summary object and set all_count
    validation_summary = get_or_create_obj(
        ValidationSummary,
        submission=submission_obj,
        type="animal")

    # reset counts
    validation_summary.reset_all_count()

    # debug
    logger.info("fill_uid_animals() completed")
Exemple #13
0
def fill_uid_samples(submission_obj, template):
    # debug
    logger.info("called fill_uid_samples()")

    # get language
    language = submission_obj.gene_bank_country.label

    # iterate among excel template
    for record in template.get_sample_records():
        # get animal by reading record
        animal_record = template.get_animal_from_sample(record)

        # get specie (mind synonyms)
        specie = DictSpecie.get_specie_check_synonyms(
            species_label=animal_record.species,
            language=language)

        logger.debug("Found '%s' as specie" % (specie))

        # get breed from animal record
        breed_record = template.get_breed_from_animal(animal_record)

        # get a country for this breed
        country = DictCountry.objects.get(
            label=breed_record.efabis_breed_country)

        # ok get a real dictbreed object
        breed = DictBreed.objects.get(
            supplied_breed=breed_record.supplied_breed,
            specie=specie,
            country=country)

        logger.debug("Selected breed is %s" % (breed))

        animal = Animal.objects.get(
            name=animal_record.animal_id_in_data_source,
            breed=breed,
            owner=submission_obj.owner)

        logger.debug("Selected animal is %s" % (animal))

        # get a organism part. Organism parts need to be in lowercases
        organism_part = get_or_create_obj(
            DictUberon,
            label=record.organism_part
        )

        # get developmental_stage and physiological_stage terms
        # they are not mandatory
        devel_stage, physio_stage = None, None

        if record.developmental_stage:
            devel_stage = get_or_create_obj(
                DictDevelStage,
                label=record.developmental_stage
            )

        if record.physiological_stage:
            physio_stage = get_or_create_obj(
                DictPhysioStage,
                label=record.physiological_stage
            )

        # deal with time columns
        (animal_age_at_collection, time_units, preparation_interval,
         preparation_interval_units) = parse_times(record, animal)

        # now get accuracy
        accuracy = ACCURACIES.get_value_by_desc(
            record.collection_place_accuracy)

        # now get storage and storage processing
        # TODO; check those values in excel columns
        storage = SAMPLE_STORAGE.get_value_by_desc(
            record.sample_storage)

        storage_processing = SAMPLE_STORAGE_PROCESSING.get_value_by_desc(
            record.sample_storage_processing)

        # create a new object. Using defaults to avoid collisions when
        # updating data
        defaults = {
            'alternative_id': record.alternative_sample_id,
            'description': record.sample_description,
            'protocol': record.specimen_collection_protocol,
            'collection_date': record.collection_date,
            'collection_place_latitude': record.collection_place_latitude,
            'collection_place_longitude': record.collection_place_longitude,
            'collection_place': record.collection_place,
            'collection_place_accuracy': accuracy,
            'organism_part': organism_part,
            'developmental_stage': devel_stage,
            'physiological_stage': physio_stage,
            'animal_age_at_collection': animal_age_at_collection,
            'animal_age_at_collection_units': time_units,
            'availability': record.availability,
            'storage': storage,
            'storage_processing': storage_processing,
            'preparation_interval': preparation_interval,
            'preparation_interval_units': preparation_interval_units,
        }

        update_or_create_obj(
            Sample,
            name=record.sample_id_in_data_source,
            animal=animal,
            owner=submission_obj.owner,
            submission=submission_obj,
            defaults=defaults)

    # create a validation summary object and set all_count
    validation_summary = get_or_create_obj(
        ValidationSummary,
        submission=submission_obj,
        type="sample")

    # reset counts
    validation_summary.reset_all_count()

    # debug
    logger.info("fill_uid_samples() completed")
Exemple #14
0
def upload_crbanim(submission):
    # debug
    logger.info("Importing from CRB-Anim file")

    # this is the full path in docker container
    fullpath = submission.get_uploaded_file_path()

    # read submission data
    reader = CRBAnimReader()
    reader.read_file(fullpath)

    # start data loading
    try:
        # check UID data like cryoweb does
        check_UID(submission, reader)

        # ok get languages from submission (useful for translation)
        # HINT: no traslations implemented, at the moment
        language = submission.gene_bank_country.label

        # a dictionary in which store animal data
        animals = {}

        for record in reader.data:
            process_record(record, submission, animals, language)

        # after processing records, initilize validationsummary objects
        # create a validation summary object and set all_count
        vs_animal = get_or_create_obj(ValidationSummary,
                                      submission=submission,
                                      type="animal")

        # reset counts
        vs_animal.reset_all_count()

        vs_sample = get_or_create_obj(ValidationSummary,
                                      submission=submission,
                                      type="sample")

        # reset counts
        vs_sample.reset_all_count()

    except Exception as exc:
        # set message:
        message = "Error in importing data: %s" % (str(exc))

        # save a message in database
        submission.status = ERROR
        submission.message = message
        submission.save()

        # send async message
        send_message(submission)

        # debug
        logger.error("error in importing from crbanim: %s" % (exc))
        logger.exception(exc)

        return False

    else:
        message = "CRBAnim import completed for submission: %s" % (
            submission.id)

        submission.message = message
        submission.status = LOADED
        submission.save()

        # send async message
        send_message(
            submission,
            validation_message=construct_validation_message(submission))

    logger.info("Import from CRBAnim is complete")

    return True
Exemple #15
0
def fill_uid_samples(submission):
    """Helper function to fill animal data in UID animal table"""

    # debug
    logger.info("called fill_uid_samples()")

    # get submission language
    language = submission.gene_bank_country.label

    for v_vessel in VVessels.objects.all():
        # get name for this sample
        name = v_vessel.ext_vessel

        # get the animal of this sample
        v_animal = v_vessel.get_animal()

        # getting specie and breed
        specie, breed = get_animal_specie_and_breed(v_animal, language)

        # get animal object using name
        animal = Animal.objects.get(
            name=v_animal.ext_animal,
            breed=breed,
            owner=submission.owner)

        # get a organism part. Organism parts need to be in lowercases
        organism_part = get_or_create_obj(
            DictUberon,
            label=v_vessel.get_organism_part().lower()
        )

        # derive animal age at collection. THis function deals with NULL valies
        animal_age_at_collection, time_units = image_timedelta(
            v_vessel.production_dt, v_animal.birth_dt)

        # create a new object. Using defaults to avoid collisions when
        # updating data
        defaults = {
            'alternative_id': v_vessel.db_vessel,
            'collection_date': v_vessel.production_dt,
            # 'protocol': v_vessel.get_protocol_name(),
            'organism_part': organism_part,
            'description': v_vessel.comment,
            'animal_age_at_collection': animal_age_at_collection,
            'animal_age_at_collection_units': time_units,
            # 'storage': v_vessel.ext_vessel_type,
        }

        update_or_create_obj(
            Sample,
            name=name,
            animal=animal,
            owner=submission.owner,
            submission=submission,
            defaults=defaults)

    # create a validation summary object and set all_count
    validation_summary = get_or_create_obj(
        ValidationSummary,
        submission=submission,
        type="sample")

    # reset counts
    validation_summary.reset_all_count()

    # debug
    logger.info("fill_uid_samples() completed")
Exemple #16
0
def fill_uid_animals(submission):
    """Helper function to fill animal data in UID animal table"""

    # debug
    logger.info("called fill_uid_animals()")

    # get submission language
    language = submission.gene_bank_country.label

    # get male and female DictSex objects from database
    male = DictSex.objects.get(label="male")
    female = DictSex.objects.get(label="female")

    # cycle over animals
    for v_animal in VAnimal.objects.all():
        # getting specie and breed
        specie, breed = get_animal_specie_and_breed(v_animal, language)

        # get name for this animal and for mother and father
        logger.debug("Getting %s as my name" % (v_animal.ext_animal))

        logger.debug("Getting %s as father" % (v_animal.ext_sire))

        # get father or None
        father = Animal.objects.filter(
            name=v_animal.ext_sire,
            breed=breed,
            owner=submission.owner).first()

        logger.debug("Getting %s as mother" % (v_animal.ext_dam))

        # get mother or None
        mother = Animal.objects.filter(
            name=v_animal.ext_dam,
            breed=breed,
            owner=submission.owner).first()

        # determine sex. Check for values
        if v_animal.ext_sex == 'm':
            sex = male

        elif v_animal.ext_sex == 'f':
            sex = female

        else:
            raise CryoWebImportError(
                "Unknown sex '%s' for '%s'" % (v_animal.ext_sex, v_animal))

        # checking accuracy
        accuracy = MISSING

        # HINT: this will is not sufficent for validation, since we need also
        # birth location as a Text to have valid birth location. Cryoweb
        # with coordinates will always fail validation
        if v_animal.latitude and v_animal.longitude:
            accuracy = UNKNOWN

        # create a new object. Using defaults to avoid collisions when
        # updating data
        defaults = {
            'alternative_id': v_animal.db_animal,
            'sex': sex,
            'father': father,
            'mother': mother,
            'birth_date': v_animal.birth_dt,
            'birth_location_latitude': v_animal.latitude,
            'birth_location_longitude': v_animal.longitude,
            'birth_location_accuracy': accuracy,
            'description': v_animal.comment,
        }

        # Upate or create animal obj
        update_or_create_obj(
            Animal,
            name=v_animal.ext_animal,
            breed=breed,
            owner=submission.owner,
            submission=submission,
            defaults=defaults)

    # create a validation summary object and set all_count
    validation_summary = get_or_create_obj(
        ValidationSummary,
        submission=submission,
        type="animal")

    # reset counts
    validation_summary.reset_all_count()

    # debug
    logger.info("fill_uid_animals() completed")