def fill_uid_breeds(submission_obj, template): """Fill DictBreed from a excel record""" logger.info("fill_uid_breeds() started") # ok get languages from submission (useful for translation) language = submission_obj.gene_bank_country.label # iterate among excel template for record in template.get_breed_records(): # get a DictSpecie object. Species are in latin names, but I can # find also a common name in translation tables specie = DictSpecie.get_specie_check_synonyms( species_label=record.species, language=language) # get country for breeds. Ideally will be the same of submission, # however, it could be possible to store data from other contries country = DictCountry.objects.get(label=record.efabis_breed_country) get_or_create_obj( DictBreed, supplied_breed=record.supplied_breed, specie=specie, country=country) logger.info("fill_uid_breeds() completed")
def fill_Organization(): """Fill organization table""" base_dir = os.path.dirname(os.path.abspath(__file__)) filename = os.path.join(base_dir, "organization_list.csv") # open data file handle = open(filename) reader = csv.reader(handle, delimiter=";") Data = collections.namedtuple('Data', "id name country") # get a submitter role submitter = fill_DictRoles() for row in map(Data._make, reader): # get a country object country = get_or_create_obj(DictCountry, label=row.country) # HINT: could be better to fix organization names in organization_list? get_or_create_obj(Organization, name=standardize_institute_name(row.name), role=submitter, country=country) handle.close()
def fill_uid_breeds(submission): """Fill UID DictBreed model. Require a submission instance""" logger.info("fill_uid_breeds() started") # get submission language language = submission.gene_bank_country.label for v_breed_specie in VBreedsSpecies.objects.all(): # get specie. Since I need a dictionary tables, DictSpecie is # already filled specie = DictSpecie.get_by_synonym( synonym=v_breed_specie.ext_species, language=language) # get country for breeds. Ideally will be the same of submission, # since the Italian cryoweb is supposed to contains italian breeds. # however, it could be possible to store data from other contries country = DictCountry.objects.get(label=v_breed_specie.efabis_country) # create breed obj if necessary get_or_create_obj( DictBreed, supplied_breed=v_breed_specie.efabis_mcname, specie=specie, country=country) logger.info("fill_uid_breeds() completed")
def fill_OrganismParts(): """Fill organism parts with manually curated terms""" data = {'strand of hair': "UBERON_0001037"} for label, term in data.items(): get_or_create_obj(DictUberon, label=label, term=term, confidence=CURATED)
def fill_uid_sample(record, animal, submission): """Helper function to fill animal data in UID sample table""" # name and animal name come from parameters organism_part_label = None sample_type_name = record.sample_type_name.lower() body_part_name = record.body_part_name.lower() # sylvain has proposed to apply the following decision rule: if body_part_name != "unknown" and body_part_name != "not relevant": organism_part_label = body_part_name else: organism_part_label = sample_type_name # get a organism part. Organism parts need to be in lowercases organism_part = get_or_create_obj(DictUberon, label=organism_part_label) # calculate animal age at collection animal_birth_date = parse_date(record.animal_birth_date) sampling_date = parse_date(record.sampling_date) animal_age_at_collection, time_units = image_timedelta( sampling_date, animal_birth_date) # get a publication (if present) publication = None if record.sample_bibliographic_references: publication = get_or_create_obj( Publication, doi=record.sample_bibliographic_references) # create a new object. Using defaults to avoid collisions when # updating data defaults = { # HINT: is a duplication of name. Can this be non-mandatory? 'alternative_id': record.sample_identifier, 'collection_date': record.sampling_date, 'protocol': record.sampling_protocol_url, 'organism_part': organism_part, # 'description': v_vessel.comment, 'storage': find_storage_type(record), 'availability': sanitize_url(record.sample_availability), 'animal_age_at_collection': animal_age_at_collection, 'animal_age_at_collection_units': time_units, 'publication': publication, } sample = update_or_create_obj(Sample, name=record.sample_identifier, animal=animal, owner=submission.owner, submission=submission, defaults=defaults) return sample
def fill_Countries(): """Fill countries and return the default country (for languages)""" # define the default country for the default language united_kingdom = get_or_create_obj(DictCountry, label='United Kingdom', term='NCIT_C17233', confidence=CURATED) # add a country difficult to annotate with zooma get_or_create_obj(DictCountry, label='Colombia', term='NCIT_C16449', confidence=CURATED) # I will return default language for translations return united_kingdom
def fill_SpeciesAndSynonyms(): """Populate cryoweb dictionary tables""" # insert country and get the default language language = fill_Countries() # those are cryoweb DE species an synonyms cryoweb = { 'Cattle': 'Bos taurus', 'Chicken': 'Gallus gallus', 'Deer': 'Cervidae', 'Duck (domestic)': 'Anas platyrhynchos', 'Goat': 'Capra hircus', 'Goose (domestic)': 'Anser anser', 'Horse': 'Equus caballus', 'Pig': 'Sus scrofa', 'Rabbit': 'Oryctolagus cuniculus', 'Sheep': 'Ovis aries', 'Turkey': 'Meleagris gallopavo', 'Rainbow trout': 'Oncorhynchus mykiss', 'Goose': 'Anser anser', 'Dog': 'Canis lupus familiaris', } for word, specie in cryoweb.items(): dictspecie = get_or_create_obj(DictSpecie, label=specie) # update with general specie result = get_general_breed_by_species(specie) if result != {}: general_breed_label = result['text'] # split the full part and get the last piece general_breed_term = result['ontologyTerms'].split("/")[-1] if dictspecie.general_breed_label != general_breed_label: dictspecie.general_breed_label = general_breed_label dictspecie.general_breed_term = general_breed_term dictspecie.save() logger.info("Added general breed: %s" % (general_breed_label)) get_or_create_obj(SpecieSynonym, dictspecie=dictspecie, language=language, word=word)
def fill_Species(): """Populate species table""" data = [{ 'confidence': CURATED, 'label': 'Crassostrea gigas', 'term': 'NCBITaxon_29159' }, { 'confidence': CURATED, 'label': 'Equus asinus', 'term': 'NCBITaxon_9793' }, { 'confidence': CURATED, 'label': 'Oncorhynchus mykiss', 'term': 'NCBITaxon_8022' }, { 'confidence': CURATED, 'label': 'Canis lupus familiaris', 'term': 'NCBITaxon_9615' }] for specie in data: get_or_create_obj(DictSpecie, **specie)
def fill_DictSex(): # define three DictSex objects get_or_create_obj(DictSex, label='male', term='PATO_0000384') get_or_create_obj(DictSex, label='female', term='PATO_0000383') get_or_create_obj(DictSex, label='record of unknown sex', term='OBI_0000858')
def fill_uid_breed(record, language): """Fill DictBreed from a crbanim record""" # get a DictSpecie object. Species are in latin names, but I can # find also a common name in translation tables specie = DictSpecie.get_specie_check_synonyms( species_label=record.species_latin_name, language=language) # get country name using pycountries country_name = pycountry.countries.get( alpha_2=record.country_of_origin).name # get country for breeds. Ideally will be the same of submission, # however, it could be possible to store data from other contries country = DictCountry.objects.get(label=country_name) breed = get_or_create_obj(DictBreed, supplied_breed=record.breed_name, specie=specie, country=country) # return a DictBreed object return breed
def fill_DictRoles(): """Fill roles and return submitter role""" # define a submitter role submitter = get_or_create_obj(DictRole, label='submitter', term='EFO_0001741') get_or_create_obj(DictRole, label='administrator', term='EFO_0009743') get_or_create_obj(DictRole, label='clinician', term='EFO_0009740') get_or_create_obj(DictRole, label='curator', term='EFO_0001733') get_or_create_obj(DictRole, label='funder', term='EFO_0001736') get_or_create_obj(DictRole, label='investigator', term='EFO_0001739') get_or_create_obj(DictRole, label='technician', term='EFO_0009739') return submitter
def fill_uid_animals(submission_obj, template): # debug logger.info("called fill_uid_animals()") # get language language = submission_obj.gene_bank_country.label # iterate among excel template for record in template.get_animal_records(): # determine sex. Check for values sex = DictSex.objects.get(label__iexact=record.sex) # get specie (mind synonyms) specie = DictSpecie.get_specie_check_synonyms( species_label=record.species, language=language) logger.debug("Found '%s' as specie" % (specie)) # how I can get breed from my data? breed_record = template.get_breed_from_animal(record) # get a country for this breed country = DictCountry.objects.get( label=breed_record.efabis_breed_country) # ok get a real dictbreed object breed = DictBreed.objects.get( supplied_breed=breed_record.supplied_breed, specie=specie, country=country) logger.debug("Selected breed is %s" % (breed)) # define mother and father mother, father = None, None # get name for this animal and for mother and father if record.father_id_in_data_source: logger.debug("Getting %s as father" % ( record.father_id_in_data_source)) father = get_relationship( record.animal_id_in_data_source, record.father_id_in_data_source, breed, submission_obj.owner) if record.mother_id_in_data_source: logger.debug("Getting %s as mother" % ( record.mother_id_in_data_source)) mother = get_relationship( record.animal_id_in_data_source, record.mother_id_in_data_source, breed, submission_obj.owner) # now get accuracy accuracy = ACCURACIES.get_value_by_desc( record.birth_location_accuracy) # create a new object. Using defaults to avoid collisions when # updating data defaults = { 'alternative_id': record.alternative_animal_id, 'description': record.animal_description, 'sex': sex, 'father': father, 'mother': mother, 'birth_date': record.birth_date, 'birth_location': record.birth_location, 'birth_location_latitude': record.birth_location_latitude, 'birth_location_longitude': record.birth_location_longitude, 'birth_location_accuracy': accuracy, } # creating or updating an object update_or_create_obj( Animal, name=record.animal_id_in_data_source, breed=breed, owner=submission_obj.owner, submission=submission_obj, defaults=defaults) # create a validation summary object and set all_count validation_summary = get_or_create_obj( ValidationSummary, submission=submission_obj, type="animal") # reset counts validation_summary.reset_all_count() # debug logger.info("fill_uid_animals() completed")
def fill_uid_samples(submission_obj, template): # debug logger.info("called fill_uid_samples()") # get language language = submission_obj.gene_bank_country.label # iterate among excel template for record in template.get_sample_records(): # get animal by reading record animal_record = template.get_animal_from_sample(record) # get specie (mind synonyms) specie = DictSpecie.get_specie_check_synonyms( species_label=animal_record.species, language=language) logger.debug("Found '%s' as specie" % (specie)) # get breed from animal record breed_record = template.get_breed_from_animal(animal_record) # get a country for this breed country = DictCountry.objects.get( label=breed_record.efabis_breed_country) # ok get a real dictbreed object breed = DictBreed.objects.get( supplied_breed=breed_record.supplied_breed, specie=specie, country=country) logger.debug("Selected breed is %s" % (breed)) animal = Animal.objects.get( name=animal_record.animal_id_in_data_source, breed=breed, owner=submission_obj.owner) logger.debug("Selected animal is %s" % (animal)) # get a organism part. Organism parts need to be in lowercases organism_part = get_or_create_obj( DictUberon, label=record.organism_part ) # get developmental_stage and physiological_stage terms # they are not mandatory devel_stage, physio_stage = None, None if record.developmental_stage: devel_stage = get_or_create_obj( DictDevelStage, label=record.developmental_stage ) if record.physiological_stage: physio_stage = get_or_create_obj( DictPhysioStage, label=record.physiological_stage ) # deal with time columns (animal_age_at_collection, time_units, preparation_interval, preparation_interval_units) = parse_times(record, animal) # now get accuracy accuracy = ACCURACIES.get_value_by_desc( record.collection_place_accuracy) # now get storage and storage processing # TODO; check those values in excel columns storage = SAMPLE_STORAGE.get_value_by_desc( record.sample_storage) storage_processing = SAMPLE_STORAGE_PROCESSING.get_value_by_desc( record.sample_storage_processing) # create a new object. Using defaults to avoid collisions when # updating data defaults = { 'alternative_id': record.alternative_sample_id, 'description': record.sample_description, 'protocol': record.specimen_collection_protocol, 'collection_date': record.collection_date, 'collection_place_latitude': record.collection_place_latitude, 'collection_place_longitude': record.collection_place_longitude, 'collection_place': record.collection_place, 'collection_place_accuracy': accuracy, 'organism_part': organism_part, 'developmental_stage': devel_stage, 'physiological_stage': physio_stage, 'animal_age_at_collection': animal_age_at_collection, 'animal_age_at_collection_units': time_units, 'availability': record.availability, 'storage': storage, 'storage_processing': storage_processing, 'preparation_interval': preparation_interval, 'preparation_interval_units': preparation_interval_units, } update_or_create_obj( Sample, name=record.sample_id_in_data_source, animal=animal, owner=submission_obj.owner, submission=submission_obj, defaults=defaults) # create a validation summary object and set all_count validation_summary = get_or_create_obj( ValidationSummary, submission=submission_obj, type="sample") # reset counts validation_summary.reset_all_count() # debug logger.info("fill_uid_samples() completed")
def upload_crbanim(submission): # debug logger.info("Importing from CRB-Anim file") # this is the full path in docker container fullpath = submission.get_uploaded_file_path() # read submission data reader = CRBAnimReader() reader.read_file(fullpath) # start data loading try: # check UID data like cryoweb does check_UID(submission, reader) # ok get languages from submission (useful for translation) # HINT: no traslations implemented, at the moment language = submission.gene_bank_country.label # a dictionary in which store animal data animals = {} for record in reader.data: process_record(record, submission, animals, language) # after processing records, initilize validationsummary objects # create a validation summary object and set all_count vs_animal = get_or_create_obj(ValidationSummary, submission=submission, type="animal") # reset counts vs_animal.reset_all_count() vs_sample = get_or_create_obj(ValidationSummary, submission=submission, type="sample") # reset counts vs_sample.reset_all_count() except Exception as exc: # set message: message = "Error in importing data: %s" % (str(exc)) # save a message in database submission.status = ERROR submission.message = message submission.save() # send async message send_message(submission) # debug logger.error("error in importing from crbanim: %s" % (exc)) logger.exception(exc) return False else: message = "CRBAnim import completed for submission: %s" % ( submission.id) submission.message = message submission.status = LOADED submission.save() # send async message send_message( submission, validation_message=construct_validation_message(submission)) logger.info("Import from CRBAnim is complete") return True
def fill_uid_samples(submission): """Helper function to fill animal data in UID animal table""" # debug logger.info("called fill_uid_samples()") # get submission language language = submission.gene_bank_country.label for v_vessel in VVessels.objects.all(): # get name for this sample name = v_vessel.ext_vessel # get the animal of this sample v_animal = v_vessel.get_animal() # getting specie and breed specie, breed = get_animal_specie_and_breed(v_animal, language) # get animal object using name animal = Animal.objects.get( name=v_animal.ext_animal, breed=breed, owner=submission.owner) # get a organism part. Organism parts need to be in lowercases organism_part = get_or_create_obj( DictUberon, label=v_vessel.get_organism_part().lower() ) # derive animal age at collection. THis function deals with NULL valies animal_age_at_collection, time_units = image_timedelta( v_vessel.production_dt, v_animal.birth_dt) # create a new object. Using defaults to avoid collisions when # updating data defaults = { 'alternative_id': v_vessel.db_vessel, 'collection_date': v_vessel.production_dt, # 'protocol': v_vessel.get_protocol_name(), 'organism_part': organism_part, 'description': v_vessel.comment, 'animal_age_at_collection': animal_age_at_collection, 'animal_age_at_collection_units': time_units, # 'storage': v_vessel.ext_vessel_type, } update_or_create_obj( Sample, name=name, animal=animal, owner=submission.owner, submission=submission, defaults=defaults) # create a validation summary object and set all_count validation_summary = get_or_create_obj( ValidationSummary, submission=submission, type="sample") # reset counts validation_summary.reset_all_count() # debug logger.info("fill_uid_samples() completed")
def fill_uid_animals(submission): """Helper function to fill animal data in UID animal table""" # debug logger.info("called fill_uid_animals()") # get submission language language = submission.gene_bank_country.label # get male and female DictSex objects from database male = DictSex.objects.get(label="male") female = DictSex.objects.get(label="female") # cycle over animals for v_animal in VAnimal.objects.all(): # getting specie and breed specie, breed = get_animal_specie_and_breed(v_animal, language) # get name for this animal and for mother and father logger.debug("Getting %s as my name" % (v_animal.ext_animal)) logger.debug("Getting %s as father" % (v_animal.ext_sire)) # get father or None father = Animal.objects.filter( name=v_animal.ext_sire, breed=breed, owner=submission.owner).first() logger.debug("Getting %s as mother" % (v_animal.ext_dam)) # get mother or None mother = Animal.objects.filter( name=v_animal.ext_dam, breed=breed, owner=submission.owner).first() # determine sex. Check for values if v_animal.ext_sex == 'm': sex = male elif v_animal.ext_sex == 'f': sex = female else: raise CryoWebImportError( "Unknown sex '%s' for '%s'" % (v_animal.ext_sex, v_animal)) # checking accuracy accuracy = MISSING # HINT: this will is not sufficent for validation, since we need also # birth location as a Text to have valid birth location. Cryoweb # with coordinates will always fail validation if v_animal.latitude and v_animal.longitude: accuracy = UNKNOWN # create a new object. Using defaults to avoid collisions when # updating data defaults = { 'alternative_id': v_animal.db_animal, 'sex': sex, 'father': father, 'mother': mother, 'birth_date': v_animal.birth_dt, 'birth_location_latitude': v_animal.latitude, 'birth_location_longitude': v_animal.longitude, 'birth_location_accuracy': accuracy, 'description': v_animal.comment, } # Upate or create animal obj update_or_create_obj( Animal, name=v_animal.ext_animal, breed=breed, owner=submission.owner, submission=submission, defaults=defaults) # create a validation summary object and set all_count validation_summary = get_or_create_obj( ValidationSummary, submission=submission, type="animal") # reset counts validation_summary.reset_all_count() # debug logger.info("fill_uid_animals() completed")